Command line
python /home/saxelrod/Repo/projects/chemprop/chemprop/train.py --config_path /home/saxelrod/synthetic/confs_ffn_fixed/train/config.json --data_path /home/saxelrod/synthetic/confs/train_full.csv --dataset_type regression
Args
{'activation': 'ReLU',
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'batch_size': 50,
 'bias': False,
 'cache_cutoff': 10000,
 'checkpoint_dir': None,
 'checkpoint_path': None,
 'checkpoint_paths': None,
 'class_balance': False,
 'config_path': '/home/saxelrod/synthetic/confs_ffn_fixed/train/config.json',
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'cuda': True,
 'data_path': '/home/saxelrod/synthetic/confs/train_full.csv',
 'dataset_type': 'regression',
 'depth': 2,
 'device': device(type='cuda', index=1),
 'dropout': 0.0,
 'ensemble_size': 1,
 'epochs': 30,
 'extra_metrics': [],
 'features_generator': ['morgan'],
 'features_only': True,
 'features_path': None,
 'features_scaling': False,
 'features_size': None,
 'ffn_hidden_size': 1900,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'gpu': 1,
 'grad_clip': None,
 'hidden_size': 1900,
 'ignore_columns': None,
 'init_lr': 0.0001,
 'log_frequency': 10,
 'max_data_size': None,
 'max_lr': 0.001,
 'metric': 'mae',
 'metrics': ['mae'],
 'minimize_score': True,
 'mpn_shared': False,
 'multiclass_num_classes': 3,
 'no_cache_mol': False,
 'no_cuda': False,
 'no_features_scaling': True,
 'num_folds': 10,
 'num_lrs': 1,
 'num_tasks': 1,
 'num_workers': 8,
 'number_of_molecules': 1,
 'pytorch_seed': 0,
 'quiet': True,
 'save_dir': '/home/saxelrod/synthetic/confs_ffn_fixed/train',
 'save_preds': False,
 'save_smiles_splits': False,
 'seed': 0,
 'separate_test_features_path': None,
 'separate_test_path': '/home/saxelrod/synthetic/confs/test_full.csv',
 'separate_val_features_path': None,
 'separate_val_path': '/home/saxelrod/synthetic/confs/val_full.csv',
 'show_individual_scores': False,
 'smiles_columns': [None],
 'split_sizes': (0.8, 0.1, 0.1),
 'split_type': 'random',
 'target_columns': None,
 'task_names': ['log_uniqueconfs'],
 'test': False,
 'test_fold_index': None,
 'train_data_size': None,
 'undirected': False,
 'use_input_features': True,
 'val_fold_index': None,
 'warmup_epochs': 2.0}
Loading data
Number of tasks = 1
Fold 0
Splitting data with seed 0
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 9.1390e-01, PNorm = 62.2434, GNorm = 2.1415, lr_0 = 1.0413e-04
Loss = 7.5343e-01, PNorm = 62.2545, GNorm = 2.7777, lr_0 = 1.0788e-04
Loss = 5.9140e-01, PNorm = 62.2673, GNorm = 2.6170, lr_0 = 1.1163e-04
Loss = 5.0750e-01, PNorm = 62.2792, GNorm = 2.0865, lr_0 = 1.1537e-04
Loss = 4.5492e-01, PNorm = 62.2899, GNorm = 2.1501, lr_0 = 1.1913e-04
Loss = 4.3827e-01, PNorm = 62.2992, GNorm = 1.6847, lr_0 = 1.2287e-04
Loss = 4.2005e-01, PNorm = 62.3073, GNorm = 1.7029, lr_0 = 1.2663e-04
Loss = 3.1725e-01, PNorm = 62.3151, GNorm = 1.5762, lr_0 = 1.3038e-04
Loss = 4.0037e-01, PNorm = 62.3242, GNorm = 2.9363, lr_0 = 1.3413e-04
Loss = 3.3037e-01, PNorm = 62.3334, GNorm = 1.9172, lr_0 = 1.3788e-04
Loss = 3.9342e-01, PNorm = 62.3420, GNorm = 2.8788, lr_0 = 1.4163e-04
Loss = 3.5565e-01, PNorm = 62.3531, GNorm = 1.9758, lr_0 = 1.4537e-04
Loss = 3.8646e-01, PNorm = 62.3636, GNorm = 1.9454, lr_0 = 1.4913e-04
Loss = 4.0107e-01, PNorm = 62.3731, GNorm = 2.3860, lr_0 = 1.5288e-04
Loss = 3.1676e-01, PNorm = 62.3837, GNorm = 2.2988, lr_0 = 1.5662e-04
Loss = 3.3470e-01, PNorm = 62.3966, GNorm = 2.1376, lr_0 = 1.6038e-04
Loss = 3.2995e-01, PNorm = 62.4067, GNorm = 1.6054, lr_0 = 1.6412e-04
Loss = 3.4228e-01, PNorm = 62.4183, GNorm = 1.5450, lr_0 = 1.6788e-04
Loss = 2.9454e-01, PNorm = 62.4308, GNorm = 1.6554, lr_0 = 1.7163e-04
Loss = 3.0828e-01, PNorm = 62.4424, GNorm = 2.1265, lr_0 = 1.7538e-04
Loss = 3.1463e-01, PNorm = 62.4545, GNorm = 2.0852, lr_0 = 1.7913e-04
Loss = 3.0771e-01, PNorm = 62.4688, GNorm = 1.5885, lr_0 = 1.8288e-04
Loss = 3.1212e-01, PNorm = 62.4831, GNorm = 3.3169, lr_0 = 1.8662e-04
Loss = 3.1390e-01, PNorm = 62.4964, GNorm = 2.1123, lr_0 = 1.9038e-04
Loss = 3.1186e-01, PNorm = 62.5100, GNorm = 1.5852, lr_0 = 1.9413e-04
Loss = 2.7984e-01, PNorm = 62.5230, GNorm = 1.8819, lr_0 = 1.9788e-04
Loss = 3.2253e-01, PNorm = 62.5376, GNorm = 2.1986, lr_0 = 2.0163e-04
Loss = 3.0609e-01, PNorm = 62.5508, GNorm = 1.9286, lr_0 = 2.0537e-04
Loss = 2.8797e-01, PNorm = 62.5650, GNorm = 1.7759, lr_0 = 2.0913e-04
Loss = 3.0824e-01, PNorm = 62.5791, GNorm = 2.2471, lr_0 = 2.1288e-04
Loss = 2.6960e-01, PNorm = 62.5947, GNorm = 2.3774, lr_0 = 2.1663e-04
Loss = 2.6061e-01, PNorm = 62.6114, GNorm = 1.3580, lr_0 = 2.2038e-04
Loss = 3.0472e-01, PNorm = 62.6277, GNorm = 2.2230, lr_0 = 2.2412e-04
Loss = 2.7685e-01, PNorm = 62.6417, GNorm = 1.4343, lr_0 = 2.2787e-04
Loss = 2.7354e-01, PNorm = 62.6595, GNorm = 1.7901, lr_0 = 2.3163e-04
Loss = 2.7874e-01, PNorm = 62.6756, GNorm = 1.2364, lr_0 = 2.3538e-04
Loss = 2.7321e-01, PNorm = 62.6924, GNorm = 1.5603, lr_0 = 2.3913e-04
Loss = 2.6591e-01, PNorm = 62.7087, GNorm = 1.4065, lr_0 = 2.4288e-04
Loss = 2.7594e-01, PNorm = 62.7252, GNorm = 2.1474, lr_0 = 2.4662e-04
Loss = 2.5570e-01, PNorm = 62.7466, GNorm = 1.7034, lr_0 = 2.5038e-04
Loss = 2.5797e-01, PNorm = 62.7652, GNorm = 1.5764, lr_0 = 2.5413e-04
Loss = 2.6230e-01, PNorm = 62.7835, GNorm = 1.8107, lr_0 = 2.5788e-04
Loss = 2.6881e-01, PNorm = 62.8009, GNorm = 1.1568, lr_0 = 2.6163e-04
Loss = 2.4259e-01, PNorm = 62.8229, GNorm = 1.5109, lr_0 = 2.6537e-04
Loss = 2.2673e-01, PNorm = 62.8408, GNorm = 1.1260, lr_0 = 2.6912e-04
Loss = 2.5609e-01, PNorm = 62.8595, GNorm = 1.2189, lr_0 = 2.7288e-04
Loss = 3.2878e-01, PNorm = 62.8807, GNorm = 1.8497, lr_0 = 2.7663e-04
Loss = 3.0702e-01, PNorm = 62.9074, GNorm = 1.1305, lr_0 = 2.8038e-04
Loss = 2.7761e-01, PNorm = 62.9334, GNorm = 1.5013, lr_0 = 2.8413e-04
Loss = 2.3658e-01, PNorm = 62.9598, GNorm = 1.6893, lr_0 = 2.8787e-04
Loss = 2.4895e-01, PNorm = 62.9810, GNorm = 1.4866, lr_0 = 2.9163e-04
Loss = 2.3181e-01, PNorm = 63.0047, GNorm = 1.8184, lr_0 = 2.9538e-04
Loss = 2.8217e-01, PNorm = 63.0254, GNorm = 1.7689, lr_0 = 2.9913e-04
Loss = 3.0167e-01, PNorm = 63.0527, GNorm = 1.2209, lr_0 = 3.0288e-04
Loss = 2.7159e-01, PNorm = 63.0769, GNorm = 1.3854, lr_0 = 3.0662e-04
Loss = 2.5030e-01, PNorm = 63.0992, GNorm = 2.0480, lr_0 = 3.1037e-04
Loss = 2.7826e-01, PNorm = 63.1299, GNorm = 1.2572, lr_0 = 3.1413e-04
Loss = 2.7336e-01, PNorm = 63.1544, GNorm = 1.4131, lr_0 = 3.1788e-04
Loss = 3.0349e-01, PNorm = 63.1792, GNorm = 1.5573, lr_0 = 3.2163e-04
Loss = 2.6003e-01, PNorm = 63.2102, GNorm = 1.4264, lr_0 = 3.2538e-04
Loss = 2.6941e-01, PNorm = 63.2375, GNorm = 1.1926, lr_0 = 3.2912e-04
Loss = 2.7741e-01, PNorm = 63.2676, GNorm = 1.4063, lr_0 = 3.3288e-04
Loss = 2.1368e-01, PNorm = 63.2965, GNorm = 1.4054, lr_0 = 3.3663e-04
Loss = 2.5812e-01, PNorm = 63.3237, GNorm = 1.2553, lr_0 = 3.4038e-04
Loss = 2.4734e-01, PNorm = 63.3501, GNorm = 1.5178, lr_0 = 3.4413e-04
Loss = 2.4825e-01, PNorm = 63.3779, GNorm = 1.0374, lr_0 = 3.4787e-04
Loss = 2.3977e-01, PNorm = 63.4087, GNorm = 1.1351, lr_0 = 3.5162e-04
Loss = 2.3765e-01, PNorm = 63.4363, GNorm = 1.0357, lr_0 = 3.5538e-04
Loss = 2.5626e-01, PNorm = 63.4645, GNorm = 1.6403, lr_0 = 3.5913e-04
Loss = 2.6080e-01, PNorm = 63.4903, GNorm = 1.6112, lr_0 = 3.6288e-04
Loss = 2.3435e-01, PNorm = 63.5248, GNorm = 1.0634, lr_0 = 3.6662e-04
Loss = 2.3224e-01, PNorm = 63.5555, GNorm = 1.0538, lr_0 = 3.7037e-04
Loss = 2.4500e-01, PNorm = 63.5876, GNorm = 1.5898, lr_0 = 3.7413e-04
Loss = 2.4758e-01, PNorm = 63.6192, GNorm = 1.1806, lr_0 = 3.7788e-04
Loss = 2.3395e-01, PNorm = 63.6488, GNorm = 1.3432, lr_0 = 3.8163e-04
Loss = 2.2009e-01, PNorm = 63.6780, GNorm = 1.4732, lr_0 = 3.8537e-04
Loss = 2.3592e-01, PNorm = 63.7088, GNorm = 1.3862, lr_0 = 3.8912e-04
Loss = 2.5549e-01, PNorm = 63.7457, GNorm = 0.9946, lr_0 = 3.9287e-04
Loss = 2.3356e-01, PNorm = 63.7833, GNorm = 1.3236, lr_0 = 3.9663e-04
Loss = 2.4335e-01, PNorm = 63.8202, GNorm = 0.8003, lr_0 = 4.0038e-04
Loss = 2.1127e-01, PNorm = 63.8560, GNorm = 0.9640, lr_0 = 4.0413e-04
Loss = 2.4295e-01, PNorm = 63.8915, GNorm = 1.5173, lr_0 = 4.0787e-04
Loss = 2.3900e-01, PNorm = 63.9268, GNorm = 1.8141, lr_0 = 4.1162e-04
Loss = 1.9969e-01, PNorm = 63.9627, GNorm = 0.9935, lr_0 = 4.1537e-04
Loss = 2.3301e-01, PNorm = 63.9969, GNorm = 1.0855, lr_0 = 4.1913e-04
Loss = 2.4852e-01, PNorm = 64.0391, GNorm = 0.7905, lr_0 = 4.2288e-04
Loss = 2.2427e-01, PNorm = 64.0773, GNorm = 1.6897, lr_0 = 4.2662e-04
Loss = 2.6092e-01, PNorm = 64.1108, GNorm = 1.5742, lr_0 = 4.3037e-04
Loss = 2.0930e-01, PNorm = 64.1503, GNorm = 1.1607, lr_0 = 4.3412e-04
Loss = 2.3002e-01, PNorm = 64.1880, GNorm = 1.2294, lr_0 = 4.3788e-04
Loss = 2.2528e-01, PNorm = 64.2274, GNorm = 0.9173, lr_0 = 4.4163e-04
Loss = 2.2090e-01, PNorm = 64.2677, GNorm = 1.2119, lr_0 = 4.4538e-04
Loss = 2.3884e-01, PNorm = 64.3052, GNorm = 1.2722, lr_0 = 4.4912e-04
Loss = 2.2773e-01, PNorm = 64.3464, GNorm = 1.3099, lr_0 = 4.5287e-04
Loss = 2.5322e-01, PNorm = 64.3919, GNorm = 1.2440, lr_0 = 4.5662e-04
Loss = 2.3081e-01, PNorm = 64.4385, GNorm = 1.2261, lr_0 = 4.6038e-04
Loss = 2.1572e-01, PNorm = 64.4836, GNorm = 0.6456, lr_0 = 4.6413e-04
Loss = 2.2330e-01, PNorm = 64.5277, GNorm = 1.0335, lr_0 = 4.6787e-04
Loss = 2.2205e-01, PNorm = 64.5703, GNorm = 1.0886, lr_0 = 4.7162e-04
Loss = 2.3005e-01, PNorm = 64.6133, GNorm = 0.8799, lr_0 = 4.7537e-04
Loss = 2.1367e-01, PNorm = 64.6626, GNorm = 1.1286, lr_0 = 4.7913e-04
Loss = 2.1176e-01, PNorm = 64.7107, GNorm = 1.1451, lr_0 = 4.8288e-04
Loss = 2.2465e-01, PNorm = 64.7486, GNorm = 1.0500, lr_0 = 4.8663e-04
Loss = 2.2635e-01, PNorm = 64.8050, GNorm = 1.1441, lr_0 = 4.9038e-04
Loss = 1.9823e-01, PNorm = 64.8509, GNorm = 1.1632, lr_0 = 4.9412e-04
Loss = 2.0793e-01, PNorm = 64.9010, GNorm = 1.2654, lr_0 = 4.9788e-04
Loss = 2.7653e-01, PNorm = 64.9403, GNorm = 1.1131, lr_0 = 5.0163e-04
Loss = 2.3102e-01, PNorm = 65.0015, GNorm = 1.6273, lr_0 = 5.0538e-04
Loss = 2.4038e-01, PNorm = 65.0610, GNorm = 0.8759, lr_0 = 5.0913e-04
Loss = 2.0808e-01, PNorm = 65.1178, GNorm = 0.8726, lr_0 = 5.1287e-04
Loss = 2.3217e-01, PNorm = 65.1740, GNorm = 1.2907, lr_0 = 5.1663e-04
Loss = 1.8110e-01, PNorm = 65.2284, GNorm = 0.6723, lr_0 = 5.2038e-04
Loss = 2.0649e-01, PNorm = 65.2745, GNorm = 0.9974, lr_0 = 5.2413e-04
Loss = 2.3123e-01, PNorm = 65.3198, GNorm = 0.8897, lr_0 = 5.2788e-04
Loss = 2.1828e-01, PNorm = 65.3730, GNorm = 0.8198, lr_0 = 5.3162e-04
Loss = 2.2233e-01, PNorm = 65.4205, GNorm = 0.9024, lr_0 = 5.3538e-04
Loss = 2.1634e-01, PNorm = 65.4697, GNorm = 1.1594, lr_0 = 5.3912e-04
Loss = 2.3502e-01, PNorm = 65.5190, GNorm = 1.0004, lr_0 = 5.4288e-04
Loss = 2.2360e-01, PNorm = 65.5630, GNorm = 0.9095, lr_0 = 5.4663e-04
Loss = 2.0142e-01, PNorm = 65.6286, GNorm = 1.1573, lr_0 = 5.5038e-04
Validation mae = 0.567120
Epoch 1
Loss = 1.3836e-01, PNorm = 65.6709, GNorm = 0.6152, lr_0 = 5.5413e-04
Loss = 1.4970e-01, PNorm = 65.7219, GNorm = 0.7144, lr_0 = 5.5787e-04
Loss = 1.6054e-01, PNorm = 65.7648, GNorm = 0.7727, lr_0 = 5.6163e-04
Loss = 1.3429e-01, PNorm = 65.8118, GNorm = 0.7195, lr_0 = 5.6538e-04
Loss = 1.5896e-01, PNorm = 65.8659, GNorm = 0.8662, lr_0 = 5.6913e-04
Loss = 1.3400e-01, PNorm = 65.9139, GNorm = 0.6234, lr_0 = 5.7288e-04
Loss = 1.3534e-01, PNorm = 65.9616, GNorm = 0.9236, lr_0 = 5.7662e-04
Loss = 1.4306e-01, PNorm = 66.0063, GNorm = 0.8549, lr_0 = 5.8038e-04
Loss = 1.3387e-01, PNorm = 66.0604, GNorm = 1.0101, lr_0 = 5.8413e-04
Loss = 1.6666e-01, PNorm = 66.1183, GNorm = 0.7795, lr_0 = 5.8788e-04
Loss = 1.3301e-01, PNorm = 66.1780, GNorm = 1.3605, lr_0 = 5.9163e-04
Loss = 1.4761e-01, PNorm = 66.2412, GNorm = 0.7439, lr_0 = 5.9538e-04
Loss = 1.5614e-01, PNorm = 66.3019, GNorm = 0.5689, lr_0 = 5.9913e-04
Loss = 1.4986e-01, PNorm = 66.3637, GNorm = 0.9329, lr_0 = 6.0288e-04
Loss = 1.3803e-01, PNorm = 66.4201, GNorm = 0.7726, lr_0 = 6.0663e-04
Loss = 1.4953e-01, PNorm = 66.4824, GNorm = 0.7105, lr_0 = 6.1038e-04
Loss = 1.5119e-01, PNorm = 66.5424, GNorm = 0.7872, lr_0 = 6.1413e-04
Loss = 1.6190e-01, PNorm = 66.6074, GNorm = 1.0473, lr_0 = 6.1788e-04
Loss = 1.1310e-01, PNorm = 66.6728, GNorm = 0.6473, lr_0 = 6.2163e-04
Loss = 1.3227e-01, PNorm = 66.7262, GNorm = 1.2548, lr_0 = 6.2538e-04
Loss = 1.6778e-01, PNorm = 66.7817, GNorm = 1.1639, lr_0 = 6.2913e-04
Loss = 1.4831e-01, PNorm = 66.8624, GNorm = 0.8291, lr_0 = 6.3288e-04
Loss = 1.6452e-01, PNorm = 66.9290, GNorm = 1.1963, lr_0 = 6.3663e-04
Loss = 1.6394e-01, PNorm = 67.0062, GNorm = 0.8663, lr_0 = 6.4038e-04
Loss = 1.3466e-01, PNorm = 67.0730, GNorm = 0.7203, lr_0 = 6.4413e-04
Loss = 1.5911e-01, PNorm = 67.1459, GNorm = 0.9184, lr_0 = 6.4788e-04
Loss = 1.4658e-01, PNorm = 67.2230, GNorm = 1.0337, lr_0 = 6.5163e-04
Loss = 1.6387e-01, PNorm = 67.2938, GNorm = 0.7654, lr_0 = 6.5538e-04
Loss = 1.5895e-01, PNorm = 67.3817, GNorm = 0.6051, lr_0 = 6.5913e-04
Loss = 1.4091e-01, PNorm = 67.4575, GNorm = 0.8973, lr_0 = 6.6288e-04
Loss = 1.7929e-01, PNorm = 67.5326, GNorm = 0.7866, lr_0 = 6.6663e-04
Loss = 1.6741e-01, PNorm = 67.6215, GNorm = 1.4977, lr_0 = 6.7038e-04
Loss = 1.7610e-01, PNorm = 67.7187, GNorm = 0.9214, lr_0 = 6.7413e-04
Loss = 1.4504e-01, PNorm = 67.8153, GNorm = 0.7799, lr_0 = 6.7788e-04
Loss = 1.7598e-01, PNorm = 67.8990, GNorm = 1.0994, lr_0 = 6.8163e-04
Loss = 1.5602e-01, PNorm = 67.9997, GNorm = 1.2309, lr_0 = 6.8538e-04
Loss = 2.1301e-01, PNorm = 68.0794, GNorm = 1.2935, lr_0 = 6.8913e-04
Loss = 1.9101e-01, PNorm = 68.1833, GNorm = 0.9733, lr_0 = 6.9288e-04
Loss = 1.5774e-01, PNorm = 68.2825, GNorm = 0.7339, lr_0 = 6.9663e-04
Loss = 1.5566e-01, PNorm = 68.3679, GNorm = 0.9057, lr_0 = 7.0038e-04
Loss = 1.4396e-01, PNorm = 68.4524, GNorm = 1.7066, lr_0 = 7.0413e-04
Loss = 1.8933e-01, PNorm = 68.5410, GNorm = 0.8087, lr_0 = 7.0788e-04
Loss = 1.8522e-01, PNorm = 68.6343, GNorm = 0.8283, lr_0 = 7.1163e-04
Loss = 1.6418e-01, PNorm = 68.7238, GNorm = 0.9071, lr_0 = 7.1538e-04
Loss = 1.6844e-01, PNorm = 68.8117, GNorm = 0.8117, lr_0 = 7.1913e-04
Loss = 1.5964e-01, PNorm = 68.8939, GNorm = 1.2254, lr_0 = 7.2288e-04
Loss = 1.6302e-01, PNorm = 68.9821, GNorm = 0.9078, lr_0 = 7.2663e-04
Loss = 1.5541e-01, PNorm = 69.0664, GNorm = 0.7845, lr_0 = 7.3038e-04
Loss = 1.6534e-01, PNorm = 69.1582, GNorm = 0.6793, lr_0 = 7.3413e-04
Loss = 1.6644e-01, PNorm = 69.2444, GNorm = 0.7008, lr_0 = 7.3788e-04
Loss = 1.8421e-01, PNorm = 69.3334, GNorm = 1.1579, lr_0 = 7.4163e-04
Loss = 1.7385e-01, PNorm = 69.4259, GNorm = 0.8266, lr_0 = 7.4538e-04
Loss = 1.5769e-01, PNorm = 69.5248, GNorm = 0.7603, lr_0 = 7.4913e-04
Loss = 1.4897e-01, PNorm = 69.6225, GNorm = 0.6780, lr_0 = 7.5288e-04
Loss = 1.4574e-01, PNorm = 69.7193, GNorm = 0.6108, lr_0 = 7.5663e-04
Loss = 1.8186e-01, PNorm = 69.8176, GNorm = 1.6303, lr_0 = 7.6038e-04
Loss = 1.5353e-01, PNorm = 69.9151, GNorm = 0.7653, lr_0 = 7.6413e-04
Loss = 1.8237e-01, PNorm = 70.0163, GNorm = 0.9180, lr_0 = 7.6788e-04
Loss = 1.6149e-01, PNorm = 70.1233, GNorm = 0.6852, lr_0 = 7.7163e-04
Loss = 1.8421e-01, PNorm = 70.2127, GNorm = 1.0020, lr_0 = 7.7538e-04
Loss = 1.9263e-01, PNorm = 70.3220, GNorm = 0.8757, lr_0 = 7.7913e-04
Loss = 1.7106e-01, PNorm = 70.4398, GNorm = 0.9108, lr_0 = 7.8288e-04
Loss = 1.7752e-01, PNorm = 70.5514, GNorm = 0.7783, lr_0 = 7.8663e-04
Loss = 1.5393e-01, PNorm = 70.6525, GNorm = 0.7664, lr_0 = 7.9038e-04
Loss = 1.4852e-01, PNorm = 70.7666, GNorm = 0.7240, lr_0 = 7.9413e-04
Loss = 1.5352e-01, PNorm = 70.8700, GNorm = 0.9026, lr_0 = 7.9788e-04
Loss = 1.6186e-01, PNorm = 70.9747, GNorm = 0.7829, lr_0 = 8.0163e-04
Loss = 1.6517e-01, PNorm = 71.0756, GNorm = 0.9514, lr_0 = 8.0538e-04
Loss = 1.9291e-01, PNorm = 71.1859, GNorm = 0.7694, lr_0 = 8.0913e-04
Loss = 1.8505e-01, PNorm = 71.3093, GNorm = 0.7840, lr_0 = 8.1288e-04
Loss = 1.8987e-01, PNorm = 71.4290, GNorm = 0.6804, lr_0 = 8.1663e-04
Loss = 1.7686e-01, PNorm = 71.5581, GNorm = 1.0183, lr_0 = 8.2038e-04
Loss = 1.7701e-01, PNorm = 71.6700, GNorm = 0.8617, lr_0 = 8.2413e-04
Loss = 1.8110e-01, PNorm = 71.7927, GNorm = 0.9168, lr_0 = 8.2788e-04
Loss = 1.7901e-01, PNorm = 71.9104, GNorm = 0.7503, lr_0 = 8.3163e-04
Loss = 1.7025e-01, PNorm = 72.0273, GNorm = 0.7346, lr_0 = 8.3538e-04
Loss = 1.8146e-01, PNorm = 72.1351, GNorm = 0.7713, lr_0 = 8.3913e-04
Loss = 1.7802e-01, PNorm = 72.2474, GNorm = 0.7576, lr_0 = 8.4288e-04
Loss = 1.6570e-01, PNorm = 72.3529, GNorm = 0.8308, lr_0 = 8.4663e-04
Loss = 1.6870e-01, PNorm = 72.4664, GNorm = 0.7403, lr_0 = 8.5038e-04
Loss = 1.6671e-01, PNorm = 72.5890, GNorm = 0.7105, lr_0 = 8.5413e-04
Loss = 1.6487e-01, PNorm = 72.6973, GNorm = 0.5596, lr_0 = 8.5788e-04
Loss = 1.8636e-01, PNorm = 72.8242, GNorm = 0.7454, lr_0 = 8.6163e-04
Loss = 1.5785e-01, PNorm = 72.9425, GNorm = 0.8845, lr_0 = 8.6538e-04
Loss = 2.0874e-01, PNorm = 73.0672, GNorm = 1.1685, lr_0 = 8.6913e-04
Loss = 1.6825e-01, PNorm = 73.1874, GNorm = 0.6362, lr_0 = 8.7288e-04
Loss = 1.8719e-01, PNorm = 73.3104, GNorm = 0.7816, lr_0 = 8.7663e-04
Loss = 1.3946e-01, PNorm = 73.4244, GNorm = 0.8205, lr_0 = 8.8038e-04
Loss = 1.7085e-01, PNorm = 73.5442, GNorm = 0.5774, lr_0 = 8.8413e-04
Loss = 1.7296e-01, PNorm = 73.6608, GNorm = 1.2604, lr_0 = 8.8788e-04
Loss = 1.8425e-01, PNorm = 73.7812, GNorm = 0.7702, lr_0 = 8.9163e-04
Loss = 1.5651e-01, PNorm = 73.9092, GNorm = 0.8330, lr_0 = 8.9538e-04
Loss = 1.6835e-01, PNorm = 74.0245, GNorm = 0.5451, lr_0 = 8.9913e-04
Loss = 1.7124e-01, PNorm = 74.1437, GNorm = 1.0980, lr_0 = 9.0288e-04
Loss = 1.6797e-01, PNorm = 74.2712, GNorm = 0.7641, lr_0 = 9.0663e-04
Loss = 1.9451e-01, PNorm = 74.3999, GNorm = 0.7387, lr_0 = 9.1038e-04
Loss = 1.6263e-01, PNorm = 74.5253, GNorm = 0.7066, lr_0 = 9.1413e-04
Loss = 1.7928e-01, PNorm = 74.6497, GNorm = 0.6594, lr_0 = 9.1788e-04
Loss = 1.9049e-01, PNorm = 74.7879, GNorm = 0.8704, lr_0 = 9.2163e-04
Loss = 1.7420e-01, PNorm = 74.9248, GNorm = 0.9879, lr_0 = 9.2538e-04
Loss = 1.8732e-01, PNorm = 75.0714, GNorm = 0.5723, lr_0 = 9.2913e-04
Loss = 1.5607e-01, PNorm = 75.1959, GNorm = 0.6692, lr_0 = 9.3288e-04
Loss = 1.9392e-01, PNorm = 75.3156, GNorm = 0.6000, lr_0 = 9.3663e-04
Loss = 1.8671e-01, PNorm = 75.4500, GNorm = 0.6178, lr_0 = 9.4038e-04
Loss = 1.3956e-01, PNorm = 75.5769, GNorm = 0.6011, lr_0 = 9.4413e-04
Loss = 1.8691e-01, PNorm = 75.6905, GNorm = 1.1155, lr_0 = 9.4788e-04
Loss = 1.7687e-01, PNorm = 75.8349, GNorm = 0.7418, lr_0 = 9.5163e-04
Loss = 1.8870e-01, PNorm = 75.9629, GNorm = 1.2253, lr_0 = 9.5538e-04
Loss = 1.7809e-01, PNorm = 76.1067, GNorm = 1.1247, lr_0 = 9.5913e-04
Loss = 1.6189e-01, PNorm = 76.2369, GNorm = 0.7444, lr_0 = 9.6288e-04
Loss = 1.9977e-01, PNorm = 76.3531, GNorm = 1.4307, lr_0 = 9.6663e-04
Loss = 1.6319e-01, PNorm = 76.4761, GNorm = 0.6368, lr_0 = 9.7038e-04
Loss = 1.7612e-01, PNorm = 76.6010, GNorm = 0.7776, lr_0 = 9.7413e-04
Loss = 1.8068e-01, PNorm = 76.7234, GNorm = 1.0812, lr_0 = 9.7788e-04
Loss = 2.0270e-01, PNorm = 76.8608, GNorm = 0.7488, lr_0 = 9.8163e-04
Loss = 1.9633e-01, PNorm = 76.9972, GNorm = 1.2753, lr_0 = 9.8537e-04
Loss = 1.5807e-01, PNorm = 77.1252, GNorm = 0.8478, lr_0 = 9.8912e-04
Loss = 1.7634e-01, PNorm = 77.2544, GNorm = 0.9092, lr_0 = 9.9288e-04
Loss = 1.8009e-01, PNorm = 77.3681, GNorm = 1.3194, lr_0 = 9.9663e-04
Loss = 1.7236e-01, PNorm = 77.4986, GNorm = 0.7031, lr_0 = 9.9993e-04
Validation mae = 0.534243
Epoch 2
Loss = 1.1762e-01, PNorm = 77.6152, GNorm = 0.4397, lr_0 = 9.9925e-04
Loss = 9.8263e-02, PNorm = 77.7127, GNorm = 1.2441, lr_0 = 9.9856e-04
Loss = 1.1980e-01, PNorm = 77.7998, GNorm = 0.5456, lr_0 = 9.9788e-04
Loss = 1.3535e-01, PNorm = 77.8957, GNorm = 0.6516, lr_0 = 9.9719e-04
Loss = 1.1655e-01, PNorm = 78.0006, GNorm = 0.7552, lr_0 = 9.9651e-04
Loss = 1.0906e-01, PNorm = 78.1048, GNorm = 0.7100, lr_0 = 9.9583e-04
Loss = 1.0708e-01, PNorm = 78.2210, GNorm = 0.5376, lr_0 = 9.9515e-04
Loss = 1.1240e-01, PNorm = 78.3517, GNorm = 1.3794, lr_0 = 9.9446e-04
Loss = 1.2511e-01, PNorm = 78.4621, GNorm = 0.8445, lr_0 = 9.9378e-04
Loss = 1.2316e-01, PNorm = 78.5744, GNorm = 1.1183, lr_0 = 9.9310e-04
Loss = 1.2536e-01, PNorm = 78.7010, GNorm = 0.5433, lr_0 = 9.9242e-04
Loss = 1.2438e-01, PNorm = 78.8074, GNorm = 0.5255, lr_0 = 9.9174e-04
Loss = 1.0112e-01, PNorm = 78.9087, GNorm = 1.3619, lr_0 = 9.9106e-04
Loss = 8.9647e-02, PNorm = 78.9928, GNorm = 0.6318, lr_0 = 9.9038e-04
Loss = 1.2460e-01, PNorm = 79.0839, GNorm = 0.6171, lr_0 = 9.8971e-04
Loss = 1.1318e-01, PNorm = 79.1860, GNorm = 1.1852, lr_0 = 9.8903e-04
Loss = 1.0080e-01, PNorm = 79.2924, GNorm = 0.5617, lr_0 = 9.8835e-04
Loss = 1.1549e-01, PNorm = 79.3961, GNorm = 0.6284, lr_0 = 9.8767e-04
Loss = 1.1473e-01, PNorm = 79.4821, GNorm = 0.6432, lr_0 = 9.8700e-04
Loss = 1.1263e-01, PNorm = 79.5892, GNorm = 1.1403, lr_0 = 9.8632e-04
Loss = 9.3043e-02, PNorm = 79.6935, GNorm = 0.6086, lr_0 = 9.8564e-04
Loss = 1.0005e-01, PNorm = 79.7944, GNorm = 0.5581, lr_0 = 9.8497e-04
Loss = 1.0604e-01, PNorm = 79.8866, GNorm = 0.5500, lr_0 = 9.8429e-04
Loss = 9.7824e-02, PNorm = 79.9970, GNorm = 0.5652, lr_0 = 9.8362e-04
Loss = 1.0256e-01, PNorm = 80.0942, GNorm = 0.6156, lr_0 = 9.8295e-04
Loss = 1.1292e-01, PNorm = 80.1967, GNorm = 0.6392, lr_0 = 9.8227e-04
Loss = 1.0625e-01, PNorm = 80.2956, GNorm = 0.5806, lr_0 = 9.8160e-04
Loss = 1.1102e-01, PNorm = 80.4058, GNorm = 0.3485, lr_0 = 9.8093e-04
Loss = 9.6659e-02, PNorm = 80.5057, GNorm = 0.4944, lr_0 = 9.8026e-04
Loss = 1.0455e-01, PNorm = 80.6161, GNorm = 0.7250, lr_0 = 9.7958e-04
Loss = 9.8789e-02, PNorm = 80.7173, GNorm = 0.4742, lr_0 = 9.7891e-04
Loss = 9.6574e-02, PNorm = 80.8115, GNorm = 0.5438, lr_0 = 9.7824e-04
Loss = 9.7443e-02, PNorm = 80.8932, GNorm = 0.5308, lr_0 = 9.7757e-04
Loss = 9.5409e-02, PNorm = 80.9834, GNorm = 0.4129, lr_0 = 9.7690e-04
Loss = 1.1741e-01, PNorm = 81.0822, GNorm = 0.8123, lr_0 = 9.7623e-04
Loss = 9.7693e-02, PNorm = 81.1895, GNorm = 0.7474, lr_0 = 9.7556e-04
Loss = 1.0416e-01, PNorm = 81.2878, GNorm = 0.4929, lr_0 = 9.7490e-04
Loss = 1.1684e-01, PNorm = 81.3995, GNorm = 0.5143, lr_0 = 9.7423e-04
Loss = 1.1608e-01, PNorm = 81.5123, GNorm = 0.8518, lr_0 = 9.7356e-04
Loss = 1.0898e-01, PNorm = 81.6255, GNorm = 0.5235, lr_0 = 9.7289e-04
Loss = 1.2618e-01, PNorm = 81.7327, GNorm = 0.7071, lr_0 = 9.7223e-04
Loss = 1.0292e-01, PNorm = 81.8451, GNorm = 0.6192, lr_0 = 9.7156e-04
Loss = 1.0453e-01, PNorm = 81.9418, GNorm = 0.4219, lr_0 = 9.7090e-04
Loss = 1.0150e-01, PNorm = 82.0400, GNorm = 0.4449, lr_0 = 9.7023e-04
Loss = 1.2436e-01, PNorm = 82.1455, GNorm = 0.5782, lr_0 = 9.6957e-04
Loss = 1.0650e-01, PNorm = 82.2434, GNorm = 0.5094, lr_0 = 9.6890e-04
Loss = 1.3034e-01, PNorm = 82.3461, GNorm = 0.5719, lr_0 = 9.6824e-04
Loss = 1.0211e-01, PNorm = 82.4506, GNorm = 0.6362, lr_0 = 9.6757e-04
Loss = 1.1842e-01, PNorm = 82.5433, GNorm = 0.6649, lr_0 = 9.6691e-04
Loss = 1.2504e-01, PNorm = 82.6405, GNorm = 0.9769, lr_0 = 9.6625e-04
Loss = 1.0254e-01, PNorm = 82.7441, GNorm = 0.4755, lr_0 = 9.6559e-04
Loss = 1.1221e-01, PNorm = 82.8413, GNorm = 0.5136, lr_0 = 9.6493e-04
Loss = 1.1013e-01, PNorm = 82.9418, GNorm = 0.4853, lr_0 = 9.6427e-04
Loss = 9.1823e-02, PNorm = 83.0464, GNorm = 0.5302, lr_0 = 9.6360e-04
Loss = 9.6958e-02, PNorm = 83.1351, GNorm = 0.4336, lr_0 = 9.6294e-04
Loss = 1.0329e-01, PNorm = 83.2280, GNorm = 0.6150, lr_0 = 9.6228e-04
Loss = 1.1223e-01, PNorm = 83.3245, GNorm = 0.5361, lr_0 = 9.6163e-04
Loss = 1.1920e-01, PNorm = 83.4287, GNorm = 0.8831, lr_0 = 9.6097e-04
Loss = 1.1943e-01, PNorm = 83.5384, GNorm = 0.6431, lr_0 = 9.6031e-04
Loss = 1.2124e-01, PNorm = 83.6398, GNorm = 0.6880, lr_0 = 9.5965e-04
Loss = 1.0685e-01, PNorm = 83.7401, GNorm = 0.5164, lr_0 = 9.5899e-04
Loss = 1.2430e-01, PNorm = 83.8437, GNorm = 0.8060, lr_0 = 9.5834e-04
Loss = 9.6537e-02, PNorm = 83.9505, GNorm = 0.4863, lr_0 = 9.5768e-04
Loss = 1.2397e-01, PNorm = 84.0558, GNorm = 0.9749, lr_0 = 9.5702e-04
Loss = 1.0637e-01, PNorm = 84.1664, GNorm = 0.5499, lr_0 = 9.5637e-04
Loss = 1.0895e-01, PNorm = 84.2769, GNorm = 0.5365, lr_0 = 9.5571e-04
Loss = 9.4101e-02, PNorm = 84.3889, GNorm = 0.7711, lr_0 = 9.5506e-04
Loss = 1.0990e-01, PNorm = 84.4964, GNorm = 0.5004, lr_0 = 9.5440e-04
Loss = 9.8176e-02, PNorm = 84.6131, GNorm = 0.6792, lr_0 = 9.5375e-04
Loss = 1.0070e-01, PNorm = 84.7051, GNorm = 0.7239, lr_0 = 9.5310e-04
Loss = 1.0809e-01, PNorm = 84.8141, GNorm = 0.5882, lr_0 = 9.5244e-04
Loss = 1.2375e-01, PNorm = 84.9274, GNorm = 0.6470, lr_0 = 9.5179e-04
Loss = 1.3028e-01, PNorm = 85.0472, GNorm = 0.5983, lr_0 = 9.5114e-04
Loss = 1.1564e-01, PNorm = 85.1589, GNorm = 0.6003, lr_0 = 9.5049e-04
Loss = 1.2690e-01, PNorm = 85.2751, GNorm = 0.6765, lr_0 = 9.4984e-04
Loss = 1.1514e-01, PNorm = 85.3891, GNorm = 0.4836, lr_0 = 9.4919e-04
Loss = 1.2261e-01, PNorm = 85.4881, GNorm = 0.7731, lr_0 = 9.4854e-04
Loss = 1.0468e-01, PNorm = 85.6104, GNorm = 0.8784, lr_0 = 9.4789e-04
Loss = 1.1158e-01, PNorm = 85.7188, GNorm = 0.9914, lr_0 = 9.4724e-04
Loss = 1.1440e-01, PNorm = 85.8332, GNorm = 0.9142, lr_0 = 9.4659e-04
Loss = 1.2520e-01, PNorm = 85.9473, GNorm = 0.7687, lr_0 = 9.4594e-04
Loss = 1.2745e-01, PNorm = 86.0706, GNorm = 0.9881, lr_0 = 9.4529e-04
Loss = 1.2614e-01, PNorm = 86.1930, GNorm = 0.4462, lr_0 = 9.4464e-04
Loss = 1.2217e-01, PNorm = 86.3163, GNorm = 0.5701, lr_0 = 9.4400e-04
Loss = 1.2489e-01, PNorm = 86.4294, GNorm = 1.0274, lr_0 = 9.4335e-04
Loss = 1.2066e-01, PNorm = 86.5458, GNorm = 0.6168, lr_0 = 9.4270e-04
Loss = 1.1403e-01, PNorm = 86.6687, GNorm = 0.4775, lr_0 = 9.4206e-04
Loss = 1.1525e-01, PNorm = 86.7812, GNorm = 0.5589, lr_0 = 9.4141e-04
Loss = 1.3760e-01, PNorm = 86.8894, GNorm = 1.2793, lr_0 = 9.4077e-04
Loss = 1.1625e-01, PNorm = 87.0029, GNorm = 0.5653, lr_0 = 9.4012e-04
Loss = 1.0929e-01, PNorm = 87.1178, GNorm = 0.5983, lr_0 = 9.3948e-04
Loss = 1.0582e-01, PNorm = 87.2229, GNorm = 0.4645, lr_0 = 9.3884e-04
Loss = 1.0952e-01, PNorm = 87.3237, GNorm = 0.6082, lr_0 = 9.3819e-04
Loss = 1.1650e-01, PNorm = 87.4272, GNorm = 0.4537, lr_0 = 9.3755e-04
Loss = 1.1294e-01, PNorm = 87.5237, GNorm = 0.6370, lr_0 = 9.3691e-04
Loss = 1.0957e-01, PNorm = 87.6153, GNorm = 0.7716, lr_0 = 9.3627e-04
Loss = 1.2195e-01, PNorm = 87.7308, GNorm = 0.6084, lr_0 = 9.3562e-04
Loss = 1.1660e-01, PNorm = 87.8393, GNorm = 0.6047, lr_0 = 9.3498e-04
Loss = 1.1404e-01, PNorm = 87.9516, GNorm = 0.7210, lr_0 = 9.3434e-04
Loss = 1.1403e-01, PNorm = 88.0549, GNorm = 0.8648, lr_0 = 9.3370e-04
Loss = 1.1369e-01, PNorm = 88.1676, GNorm = 0.4276, lr_0 = 9.3306e-04
Loss = 1.0846e-01, PNorm = 88.2750, GNorm = 0.5149, lr_0 = 9.3242e-04
Loss = 1.0860e-01, PNorm = 88.3770, GNorm = 0.5871, lr_0 = 9.3178e-04
Loss = 1.2016e-01, PNorm = 88.4882, GNorm = 0.6009, lr_0 = 9.3115e-04
Loss = 1.1454e-01, PNorm = 88.5997, GNorm = 0.4757, lr_0 = 9.3051e-04
Loss = 1.2280e-01, PNorm = 88.7099, GNorm = 0.5439, lr_0 = 9.2987e-04
Loss = 1.0988e-01, PNorm = 88.8259, GNorm = 0.5783, lr_0 = 9.2923e-04
Loss = 1.1844e-01, PNorm = 88.9301, GNorm = 0.6959, lr_0 = 9.2860e-04
Loss = 1.1316e-01, PNorm = 89.0266, GNorm = 0.5913, lr_0 = 9.2796e-04
Loss = 1.0097e-01, PNorm = 89.1206, GNorm = 0.7351, lr_0 = 9.2733e-04
Loss = 1.2454e-01, PNorm = 89.2206, GNorm = 0.9624, lr_0 = 9.2669e-04
Loss = 1.1596e-01, PNorm = 89.3246, GNorm = 0.9947, lr_0 = 9.2606e-04
Loss = 1.2546e-01, PNorm = 89.4242, GNorm = 0.5665, lr_0 = 9.2542e-04
Loss = 1.1707e-01, PNorm = 89.5369, GNorm = 1.0937, lr_0 = 9.2479e-04
Loss = 1.2252e-01, PNorm = 89.6339, GNorm = 0.7477, lr_0 = 9.2415e-04
Loss = 1.1564e-01, PNorm = 89.7306, GNorm = 0.5685, lr_0 = 9.2352e-04
Loss = 1.1434e-01, PNorm = 89.8372, GNorm = 0.4675, lr_0 = 9.2289e-04
Loss = 1.0836e-01, PNorm = 89.9262, GNorm = 1.1902, lr_0 = 9.2226e-04
Loss = 1.0560e-01, PNorm = 90.0238, GNorm = 0.6036, lr_0 = 9.2162e-04
Loss = 1.2653e-01, PNorm = 90.1285, GNorm = 0.7175, lr_0 = 9.2099e-04
Validation mae = 0.509211
Epoch 3
Loss = 7.2701e-02, PNorm = 90.2220, GNorm = 0.4693, lr_0 = 9.2036e-04
Loss = 8.3532e-02, PNorm = 90.3002, GNorm = 0.6227, lr_0 = 9.1973e-04
Loss = 6.4426e-02, PNorm = 90.3641, GNorm = 0.9092, lr_0 = 9.1910e-04
Loss = 6.3668e-02, PNorm = 90.4291, GNorm = 0.3297, lr_0 = 9.1847e-04
Loss = 6.8983e-02, PNorm = 90.4897, GNorm = 0.9494, lr_0 = 9.1784e-04
Loss = 6.1150e-02, PNorm = 90.5546, GNorm = 0.3069, lr_0 = 9.1721e-04
Loss = 6.9230e-02, PNorm = 90.6104, GNorm = 0.4063, lr_0 = 9.1658e-04
Loss = 6.1078e-02, PNorm = 90.6770, GNorm = 0.3419, lr_0 = 9.1596e-04
Loss = 6.6124e-02, PNorm = 90.7303, GNorm = 0.5784, lr_0 = 9.1533e-04
Loss = 6.6635e-02, PNorm = 90.7918, GNorm = 0.7536, lr_0 = 9.1470e-04
Loss = 6.5784e-02, PNorm = 90.8551, GNorm = 0.3211, lr_0 = 9.1408e-04
Loss = 6.7741e-02, PNorm = 90.9176, GNorm = 0.4640, lr_0 = 9.1345e-04
Loss = 6.3754e-02, PNorm = 90.9794, GNorm = 0.5386, lr_0 = 9.1282e-04
Loss = 6.6212e-02, PNorm = 91.0325, GNorm = 0.3873, lr_0 = 9.1220e-04
Loss = 6.1586e-02, PNorm = 91.1018, GNorm = 0.9192, lr_0 = 9.1157e-04
Loss = 6.6598e-02, PNorm = 91.1636, GNorm = 0.5382, lr_0 = 9.1095e-04
Loss = 6.7939e-02, PNorm = 91.2410, GNorm = 0.2976, lr_0 = 9.1032e-04
Loss = 6.3269e-02, PNorm = 91.3124, GNorm = 0.4901, lr_0 = 9.0970e-04
Loss = 6.7058e-02, PNorm = 91.3857, GNorm = 0.5056, lr_0 = 9.0908e-04
Loss = 6.7556e-02, PNorm = 91.4484, GNorm = 1.2244, lr_0 = 9.0846e-04
Loss = 5.8294e-02, PNorm = 91.5173, GNorm = 0.6636, lr_0 = 9.0783e-04
Loss = 6.4373e-02, PNorm = 91.5712, GNorm = 0.4307, lr_0 = 9.0721e-04
Loss = 8.3240e-02, PNorm = 91.6456, GNorm = 1.7466, lr_0 = 9.0659e-04
Loss = 7.2732e-02, PNorm = 91.7180, GNorm = 0.4709, lr_0 = 9.0597e-04
Loss = 7.3081e-02, PNorm = 91.7956, GNorm = 0.6533, lr_0 = 9.0535e-04
Loss = 6.6068e-02, PNorm = 91.8714, GNorm = 0.2956, lr_0 = 9.0473e-04
Loss = 6.2467e-02, PNorm = 91.9370, GNorm = 0.5232, lr_0 = 9.0411e-04
Loss = 7.3328e-02, PNorm = 92.0018, GNorm = 0.4729, lr_0 = 9.0349e-04
Loss = 6.5987e-02, PNorm = 92.0726, GNorm = 0.6628, lr_0 = 9.0287e-04
Loss = 6.5711e-02, PNorm = 92.1351, GNorm = 0.5190, lr_0 = 9.0225e-04
Loss = 7.5470e-02, PNorm = 92.2155, GNorm = 0.5339, lr_0 = 9.0163e-04
Loss = 6.4842e-02, PNorm = 92.2825, GNorm = 0.5506, lr_0 = 9.0102e-04
Loss = 7.4169e-02, PNorm = 92.3567, GNorm = 0.5406, lr_0 = 9.0040e-04
Loss = 6.6247e-02, PNorm = 92.4215, GNorm = 0.5747, lr_0 = 8.9978e-04
Loss = 6.5628e-02, PNorm = 92.4947, GNorm = 0.5670, lr_0 = 8.9916e-04
Loss = 5.6919e-02, PNorm = 92.5696, GNorm = 0.8788, lr_0 = 8.9855e-04
Loss = 7.7538e-02, PNorm = 92.6379, GNorm = 0.8765, lr_0 = 8.9793e-04
Loss = 7.3491e-02, PNorm = 92.7131, GNorm = 0.5335, lr_0 = 8.9732e-04
Loss = 7.4297e-02, PNorm = 92.7913, GNorm = 0.4036, lr_0 = 8.9670e-04
Loss = 6.8873e-02, PNorm = 92.8562, GNorm = 0.8085, lr_0 = 8.9609e-04
Loss = 6.6993e-02, PNorm = 92.9307, GNorm = 0.5755, lr_0 = 8.9548e-04
Loss = 6.6559e-02, PNorm = 93.0071, GNorm = 0.7521, lr_0 = 8.9486e-04
Loss = 6.3122e-02, PNorm = 93.0875, GNorm = 0.3485, lr_0 = 8.9425e-04
Loss = 6.3947e-02, PNorm = 93.1622, GNorm = 0.4234, lr_0 = 8.9364e-04
Loss = 6.7662e-02, PNorm = 93.2345, GNorm = 0.4974, lr_0 = 8.9302e-04
Loss = 5.9515e-02, PNorm = 93.3148, GNorm = 0.4401, lr_0 = 8.9241e-04
Loss = 7.8499e-02, PNorm = 93.3802, GNorm = 0.7669, lr_0 = 8.9180e-04
Loss = 6.6522e-02, PNorm = 93.4584, GNorm = 0.4512, lr_0 = 8.9119e-04
Loss = 7.0144e-02, PNorm = 93.5332, GNorm = 0.7147, lr_0 = 8.9058e-04
Loss = 6.4594e-02, PNorm = 93.6091, GNorm = 0.3654, lr_0 = 8.8997e-04
Loss = 7.3007e-02, PNorm = 93.6937, GNorm = 0.3696, lr_0 = 8.8936e-04
Loss = 6.8340e-02, PNorm = 93.7820, GNorm = 0.4560, lr_0 = 8.8875e-04
Loss = 7.1966e-02, PNorm = 93.8600, GNorm = 0.7366, lr_0 = 8.8814e-04
Loss = 6.7735e-02, PNorm = 93.9306, GNorm = 0.3893, lr_0 = 8.8753e-04
Loss = 7.8201e-02, PNorm = 94.0048, GNorm = 0.4193, lr_0 = 8.8693e-04
Loss = 7.3134e-02, PNorm = 94.0836, GNorm = 0.6668, lr_0 = 8.8632e-04
Loss = 8.1866e-02, PNorm = 94.1619, GNorm = 0.5245, lr_0 = 8.8571e-04
Loss = 6.9945e-02, PNorm = 94.2444, GNorm = 0.4948, lr_0 = 8.8510e-04
Loss = 6.8088e-02, PNorm = 94.3174, GNorm = 1.1281, lr_0 = 8.8450e-04
Loss = 7.1986e-02, PNorm = 94.4073, GNorm = 0.4019, lr_0 = 8.8389e-04
Loss = 7.5852e-02, PNorm = 94.4805, GNorm = 0.4101, lr_0 = 8.8329e-04
Loss = 7.3593e-02, PNorm = 94.5647, GNorm = 0.3442, lr_0 = 8.8268e-04
Loss = 6.6815e-02, PNorm = 94.6493, GNorm = 0.3883, lr_0 = 8.8208e-04
Loss = 7.4373e-02, PNorm = 94.7368, GNorm = 0.7018, lr_0 = 8.8147e-04
Loss = 6.7581e-02, PNorm = 94.8180, GNorm = 0.4084, lr_0 = 8.8087e-04
Loss = 6.9447e-02, PNorm = 94.9030, GNorm = 0.5321, lr_0 = 8.8026e-04
Loss = 8.1069e-02, PNorm = 94.9827, GNorm = 0.4359, lr_0 = 8.7966e-04
Loss = 6.7252e-02, PNorm = 95.0738, GNorm = 0.8246, lr_0 = 8.7906e-04
Loss = 6.8738e-02, PNorm = 95.1549, GNorm = 0.3791, lr_0 = 8.7846e-04
Loss = 7.7355e-02, PNorm = 95.2462, GNorm = 0.3433, lr_0 = 8.7785e-04
Loss = 8.5500e-02, PNorm = 95.3316, GNorm = 0.4495, lr_0 = 8.7725e-04
Loss = 7.4463e-02, PNorm = 95.4185, GNorm = 0.4502, lr_0 = 8.7665e-04
Loss = 8.1894e-02, PNorm = 95.5041, GNorm = 0.4372, lr_0 = 8.7605e-04
Loss = 7.0943e-02, PNorm = 95.5912, GNorm = 0.9757, lr_0 = 8.7545e-04
Loss = 7.8489e-02, PNorm = 95.6824, GNorm = 1.0602, lr_0 = 8.7485e-04
Loss = 7.1765e-02, PNorm = 95.7591, GNorm = 0.4672, lr_0 = 8.7425e-04
Loss = 7.7756e-02, PNorm = 95.8548, GNorm = 0.4363, lr_0 = 8.7365e-04
Loss = 7.7739e-02, PNorm = 95.9528, GNorm = 0.5421, lr_0 = 8.7306e-04
Loss = 8.1945e-02, PNorm = 96.0400, GNorm = 0.6692, lr_0 = 8.7246e-04
Loss = 7.7898e-02, PNorm = 96.1442, GNorm = 0.7647, lr_0 = 8.7186e-04
Loss = 7.8456e-02, PNorm = 96.2317, GNorm = 0.5801, lr_0 = 8.7126e-04
Loss = 8.2011e-02, PNorm = 96.3272, GNorm = 0.7668, lr_0 = 8.7067e-04
Loss = 6.7335e-02, PNorm = 96.4218, GNorm = 0.3485, lr_0 = 8.7007e-04
Loss = 7.9278e-02, PNorm = 96.5118, GNorm = 0.6780, lr_0 = 8.6947e-04
Loss = 6.9633e-02, PNorm = 96.5950, GNorm = 0.4672, lr_0 = 8.6888e-04
Loss = 7.4807e-02, PNorm = 96.6771, GNorm = 0.5417, lr_0 = 8.6828e-04
Loss = 7.5999e-02, PNorm = 96.7730, GNorm = 1.2996, lr_0 = 8.6769e-04
Loss = 7.7319e-02, PNorm = 96.8471, GNorm = 0.7457, lr_0 = 8.6709e-04
Loss = 7.2044e-02, PNorm = 96.9369, GNorm = 0.9417, lr_0 = 8.6650e-04
Loss = 7.4747e-02, PNorm = 97.0252, GNorm = 0.8920, lr_0 = 8.6590e-04
Loss = 6.6912e-02, PNorm = 97.1094, GNorm = 1.0966, lr_0 = 8.6531e-04
Loss = 7.6684e-02, PNorm = 97.1830, GNorm = 0.4755, lr_0 = 8.6472e-04
Loss = 7.3483e-02, PNorm = 97.2671, GNorm = 0.7364, lr_0 = 8.6413e-04
Loss = 7.0567e-02, PNorm = 97.3501, GNorm = 0.7812, lr_0 = 8.6353e-04
Loss = 7.6598e-02, PNorm = 97.4368, GNorm = 0.4537, lr_0 = 8.6294e-04
Loss = 7.5056e-02, PNorm = 97.5274, GNorm = 0.3431, lr_0 = 8.6235e-04
Loss = 7.5482e-02, PNorm = 97.6143, GNorm = 0.8292, lr_0 = 8.6176e-04
Loss = 8.0586e-02, PNorm = 97.6985, GNorm = 0.3939, lr_0 = 8.6117e-04
Loss = 7.7067e-02, PNorm = 97.7947, GNorm = 0.4857, lr_0 = 8.6058e-04
Loss = 8.4442e-02, PNorm = 97.8849, GNorm = 0.5200, lr_0 = 8.5999e-04
Loss = 7.4032e-02, PNorm = 97.9871, GNorm = 0.3817, lr_0 = 8.5940e-04
Loss = 8.3752e-02, PNorm = 98.0797, GNorm = 0.6974, lr_0 = 8.5881e-04
Loss = 7.6594e-02, PNorm = 98.1792, GNorm = 0.3446, lr_0 = 8.5823e-04
Loss = 7.2368e-02, PNorm = 98.2707, GNorm = 0.4130, lr_0 = 8.5764e-04
Loss = 7.9125e-02, PNorm = 98.3696, GNorm = 0.5905, lr_0 = 8.5705e-04
Loss = 8.0212e-02, PNorm = 98.4794, GNorm = 0.5958, lr_0 = 8.5646e-04
Loss = 8.1195e-02, PNorm = 98.5761, GNorm = 0.6068, lr_0 = 8.5588e-04
Loss = 8.3135e-02, PNorm = 98.6878, GNorm = 0.5621, lr_0 = 8.5529e-04
Loss = 8.3488e-02, PNorm = 98.7834, GNorm = 0.6848, lr_0 = 8.5470e-04
Loss = 8.3226e-02, PNorm = 98.8925, GNorm = 0.8419, lr_0 = 8.5412e-04
Loss = 8.3279e-02, PNorm = 98.9926, GNorm = 0.6882, lr_0 = 8.5353e-04
Loss = 8.4112e-02, PNorm = 99.0922, GNorm = 0.7005, lr_0 = 8.5295e-04
Loss = 7.7667e-02, PNorm = 99.1839, GNorm = 1.2713, lr_0 = 8.5236e-04
Loss = 7.2885e-02, PNorm = 99.2683, GNorm = 0.4785, lr_0 = 8.5178e-04
Loss = 8.3325e-02, PNorm = 99.3593, GNorm = 0.8731, lr_0 = 8.5120e-04
Loss = 6.7115e-02, PNorm = 99.4432, GNorm = 0.6817, lr_0 = 8.5061e-04
Loss = 7.2023e-02, PNorm = 99.5334, GNorm = 0.7568, lr_0 = 8.5003e-04
Loss = 7.9141e-02, PNorm = 99.6257, GNorm = 0.5955, lr_0 = 8.4945e-04
Loss = 7.4941e-02, PNorm = 99.7157, GNorm = 0.4375, lr_0 = 8.4887e-04
Loss = 8.3828e-02, PNorm = 99.8171, GNorm = 0.4610, lr_0 = 8.4828e-04
Validation mae = 0.503959
Epoch 4
Loss = 5.4049e-02, PNorm = 99.8851, GNorm = 0.4493, lr_0 = 8.4770e-04
Loss = 5.6664e-02, PNorm = 99.9520, GNorm = 0.3564, lr_0 = 8.4712e-04
Loss = 5.1764e-02, PNorm = 100.0075, GNorm = 0.3446, lr_0 = 8.4654e-04
Loss = 5.4057e-02, PNorm = 100.0664, GNorm = 0.6062, lr_0 = 8.4596e-04
Loss = 5.2227e-02, PNorm = 100.1250, GNorm = 0.5557, lr_0 = 8.4538e-04
Loss = 4.4630e-02, PNorm = 100.1865, GNorm = 0.3640, lr_0 = 8.4480e-04
Loss = 4.6103e-02, PNorm = 100.2528, GNorm = 0.7146, lr_0 = 8.4423e-04
Loss = 4.8396e-02, PNorm = 100.3108, GNorm = 0.3258, lr_0 = 8.4365e-04
Loss = 4.4624e-02, PNorm = 100.3636, GNorm = 0.2751, lr_0 = 8.4307e-04
Loss = 4.4388e-02, PNorm = 100.4134, GNorm = 0.4937, lr_0 = 8.4249e-04
Loss = 4.2208e-02, PNorm = 100.4687, GNorm = 0.3016, lr_0 = 8.4191e-04
Loss = 4.8295e-02, PNorm = 100.5189, GNorm = 0.6309, lr_0 = 8.4134e-04
Loss = 5.2019e-02, PNorm = 100.5790, GNorm = 0.2705, lr_0 = 8.4076e-04
Loss = 4.9220e-02, PNorm = 100.6494, GNorm = 0.3083, lr_0 = 8.4019e-04
Loss = 4.9091e-02, PNorm = 100.7101, GNorm = 0.3809, lr_0 = 8.3961e-04
Loss = 4.7722e-02, PNorm = 100.7728, GNorm = 0.4085, lr_0 = 8.3903e-04
Loss = 4.0085e-02, PNorm = 100.8286, GNorm = 0.4934, lr_0 = 8.3846e-04
Loss = 5.0644e-02, PNorm = 100.8837, GNorm = 0.5599, lr_0 = 8.3789e-04
Loss = 4.5073e-02, PNorm = 100.9399, GNorm = 0.3824, lr_0 = 8.3731e-04
Loss = 4.3598e-02, PNorm = 100.9973, GNorm = 0.4490, lr_0 = 8.3674e-04
Loss = 4.6798e-02, PNorm = 101.0558, GNorm = 0.6678, lr_0 = 8.3616e-04
Loss = 5.0545e-02, PNorm = 101.1111, GNorm = 0.3386, lr_0 = 8.3559e-04
Loss = 4.7359e-02, PNorm = 101.1648, GNorm = 0.3415, lr_0 = 8.3502e-04
Loss = 4.5201e-02, PNorm = 101.2276, GNorm = 0.5137, lr_0 = 8.3445e-04
Loss = 4.7060e-02, PNorm = 101.2838, GNorm = 0.5188, lr_0 = 8.3388e-04
Loss = 4.2596e-02, PNorm = 101.3438, GNorm = 0.3696, lr_0 = 8.3330e-04
Loss = 4.0174e-02, PNorm = 101.3983, GNorm = 0.2307, lr_0 = 8.3273e-04
Loss = 5.0377e-02, PNorm = 101.4550, GNorm = 0.4247, lr_0 = 8.3216e-04
Loss = 4.4591e-02, PNorm = 101.5115, GNorm = 0.2735, lr_0 = 8.3159e-04
Loss = 4.4798e-02, PNorm = 101.5607, GNorm = 0.3109, lr_0 = 8.3102e-04
Loss = 4.6988e-02, PNorm = 101.6114, GNorm = 0.7406, lr_0 = 8.3045e-04
Loss = 4.5728e-02, PNorm = 101.6700, GNorm = 0.4159, lr_0 = 8.2988e-04
Loss = 4.7166e-02, PNorm = 101.7243, GNorm = 0.5726, lr_0 = 8.2932e-04
Loss = 3.9221e-02, PNorm = 101.7813, GNorm = 0.3378, lr_0 = 8.2875e-04
Loss = 4.3542e-02, PNorm = 101.8327, GNorm = 0.2339, lr_0 = 8.2818e-04
Loss = 4.7383e-02, PNorm = 101.8907, GNorm = 0.4130, lr_0 = 8.2761e-04
Loss = 4.9307e-02, PNorm = 101.9451, GNorm = 0.7874, lr_0 = 8.2705e-04
Loss = 4.7544e-02, PNorm = 101.9991, GNorm = 0.3409, lr_0 = 8.2648e-04
Loss = 4.7448e-02, PNorm = 102.0553, GNorm = 0.3640, lr_0 = 8.2591e-04
Loss = 4.4221e-02, PNorm = 102.1139, GNorm = 0.4399, lr_0 = 8.2535e-04
Loss = 4.7171e-02, PNorm = 102.1691, GNorm = 0.6896, lr_0 = 8.2478e-04
Loss = 4.2993e-02, PNorm = 102.2249, GNorm = 0.6503, lr_0 = 8.2422e-04
Loss = 5.0396e-02, PNorm = 102.2827, GNorm = 0.3122, lr_0 = 8.2365e-04
Loss = 4.4255e-02, PNorm = 102.3590, GNorm = 0.6722, lr_0 = 8.2309e-04
Loss = 4.8956e-02, PNorm = 102.4232, GNorm = 0.3360, lr_0 = 8.2252e-04
Loss = 4.9256e-02, PNorm = 102.4910, GNorm = 0.4696, lr_0 = 8.2196e-04
Loss = 4.4014e-02, PNorm = 102.5522, GNorm = 0.9012, lr_0 = 8.2140e-04
Loss = 4.7289e-02, PNorm = 102.6221, GNorm = 0.3227, lr_0 = 8.2084e-04
Loss = 5.0272e-02, PNorm = 102.6923, GNorm = 0.4817, lr_0 = 8.2027e-04
Loss = 4.3214e-02, PNorm = 102.7600, GNorm = 0.4191, lr_0 = 8.1971e-04
Loss = 5.0141e-02, PNorm = 102.8223, GNorm = 0.3122, lr_0 = 8.1915e-04
Loss = 5.4679e-02, PNorm = 102.8833, GNorm = 0.3520, lr_0 = 8.1859e-04
Loss = 4.5374e-02, PNorm = 102.9454, GNorm = 0.4543, lr_0 = 8.1803e-04
Loss = 4.8927e-02, PNorm = 103.0022, GNorm = 0.4271, lr_0 = 8.1747e-04
Loss = 4.1808e-02, PNorm = 103.0692, GNorm = 0.2650, lr_0 = 8.1691e-04
Loss = 4.7639e-02, PNorm = 103.1319, GNorm = 0.3397, lr_0 = 8.1635e-04
Loss = 4.4841e-02, PNorm = 103.1968, GNorm = 0.9536, lr_0 = 8.1579e-04
Loss = 4.5762e-02, PNorm = 103.2639, GNorm = 0.6463, lr_0 = 8.1523e-04
Loss = 4.6371e-02, PNorm = 103.3204, GNorm = 0.2706, lr_0 = 8.1467e-04
Loss = 5.1453e-02, PNorm = 103.3875, GNorm = 0.5948, lr_0 = 8.1411e-04
Loss = 4.0781e-02, PNorm = 103.4628, GNorm = 0.2869, lr_0 = 8.1355e-04
Loss = 4.9479e-02, PNorm = 103.5289, GNorm = 0.3311, lr_0 = 8.1300e-04
Loss = 4.6695e-02, PNorm = 103.6012, GNorm = 0.3030, lr_0 = 8.1244e-04
Loss = 5.4195e-02, PNorm = 103.6750, GNorm = 0.5853, lr_0 = 8.1188e-04
Loss = 4.2110e-02, PNorm = 103.7482, GNorm = 0.3583, lr_0 = 8.1133e-04
Loss = 5.4374e-02, PNorm = 103.8119, GNorm = 0.4599, lr_0 = 8.1077e-04
Loss = 4.7368e-02, PNorm = 103.8821, GNorm = 0.4459, lr_0 = 8.1022e-04
Loss = 5.1898e-02, PNorm = 103.9484, GNorm = 0.4551, lr_0 = 8.0966e-04
Loss = 4.7704e-02, PNorm = 104.0208, GNorm = 0.6042, lr_0 = 8.0911e-04
Loss = 4.1487e-02, PNorm = 104.0898, GNorm = 0.4637, lr_0 = 8.0855e-04
Loss = 5.2426e-02, PNorm = 104.1530, GNorm = 0.6299, lr_0 = 8.0800e-04
Loss = 5.4181e-02, PNorm = 104.2168, GNorm = 0.3492, lr_0 = 8.0745e-04
Loss = 5.8360e-02, PNorm = 104.2955, GNorm = 0.7080, lr_0 = 8.0689e-04
Loss = 4.6406e-02, PNorm = 104.3619, GNorm = 0.4874, lr_0 = 8.0634e-04
Loss = 4.9386e-02, PNorm = 104.4355, GNorm = 0.4536, lr_0 = 8.0579e-04
Loss = 4.6961e-02, PNorm = 104.5043, GNorm = 0.8638, lr_0 = 8.0523e-04
Loss = 5.1240e-02, PNorm = 104.5798, GNorm = 0.5271, lr_0 = 8.0468e-04
Loss = 5.0908e-02, PNorm = 104.6505, GNorm = 0.4674, lr_0 = 8.0413e-04
Loss = 5.6362e-02, PNorm = 104.7148, GNorm = 0.6212, lr_0 = 8.0358e-04
Loss = 5.2071e-02, PNorm = 104.7870, GNorm = 0.7224, lr_0 = 8.0303e-04
Loss = 5.0575e-02, PNorm = 104.8622, GNorm = 0.8641, lr_0 = 8.0248e-04
Loss = 5.0701e-02, PNorm = 104.9365, GNorm = 0.6755, lr_0 = 8.0193e-04
Loss = 5.3794e-02, PNorm = 105.0131, GNorm = 0.6043, lr_0 = 8.0138e-04
Loss = 5.4582e-02, PNorm = 105.0893, GNorm = 0.4732, lr_0 = 8.0083e-04
Loss = 5.3430e-02, PNorm = 105.1696, GNorm = 0.8238, lr_0 = 8.0028e-04
Loss = 5.1175e-02, PNorm = 105.2375, GNorm = 0.8046, lr_0 = 7.9974e-04
Loss = 4.9665e-02, PNorm = 105.3068, GNorm = 0.5379, lr_0 = 7.9919e-04
Loss = 4.8744e-02, PNorm = 105.3784, GNorm = 0.3304, lr_0 = 7.9864e-04
Loss = 5.4199e-02, PNorm = 105.4535, GNorm = 0.3471, lr_0 = 7.9809e-04
Loss = 5.2473e-02, PNorm = 105.5326, GNorm = 0.2520, lr_0 = 7.9755e-04
Loss = 4.6629e-02, PNorm = 105.6089, GNorm = 0.3347, lr_0 = 7.9700e-04
Loss = 5.5144e-02, PNorm = 105.6942, GNorm = 0.3490, lr_0 = 7.9645e-04
Loss = 5.7221e-02, PNorm = 105.7697, GNorm = 0.9401, lr_0 = 7.9591e-04
Loss = 6.0176e-02, PNorm = 105.8538, GNorm = 0.2735, lr_0 = 7.9536e-04
Loss = 4.8099e-02, PNorm = 105.9314, GNorm = 0.2374, lr_0 = 7.9482e-04
Loss = 5.6342e-02, PNorm = 106.0039, GNorm = 1.0446, lr_0 = 7.9427e-04
Loss = 5.5651e-02, PNorm = 106.0825, GNorm = 0.4642, lr_0 = 7.9373e-04
Loss = 4.7709e-02, PNorm = 106.1578, GNorm = 0.3487, lr_0 = 7.9319e-04
Loss = 5.3244e-02, PNorm = 106.2318, GNorm = 0.4396, lr_0 = 7.9264e-04
Loss = 5.0750e-02, PNorm = 106.3081, GNorm = 0.3594, lr_0 = 7.9210e-04
Loss = 5.2570e-02, PNorm = 106.3836, GNorm = 0.3885, lr_0 = 7.9156e-04
Loss = 5.4072e-02, PNorm = 106.4658, GNorm = 0.3912, lr_0 = 7.9101e-04
Loss = 5.8199e-02, PNorm = 106.5525, GNorm = 0.5549, lr_0 = 7.9047e-04
Loss = 4.6498e-02, PNorm = 106.6346, GNorm = 0.2813, lr_0 = 7.8993e-04
Loss = 5.7285e-02, PNorm = 106.7082, GNorm = 0.3848, lr_0 = 7.8939e-04
Loss = 4.9401e-02, PNorm = 106.7795, GNorm = 0.6665, lr_0 = 7.8885e-04
Loss = 5.6794e-02, PNorm = 106.8587, GNorm = 0.6532, lr_0 = 7.8831e-04
Loss = 5.3082e-02, PNorm = 106.9399, GNorm = 0.6910, lr_0 = 7.8777e-04
Loss = 5.4597e-02, PNorm = 107.0202, GNorm = 0.2588, lr_0 = 7.8723e-04
Loss = 5.7351e-02, PNorm = 107.0966, GNorm = 0.4894, lr_0 = 7.8669e-04
Loss = 5.2644e-02, PNorm = 107.1712, GNorm = 0.4336, lr_0 = 7.8615e-04
Loss = 6.1861e-02, PNorm = 107.2554, GNorm = 0.6714, lr_0 = 7.8561e-04
Loss = 5.1905e-02, PNorm = 107.3462, GNorm = 0.3203, lr_0 = 7.8507e-04
Loss = 5.9634e-02, PNorm = 107.4408, GNorm = 0.4950, lr_0 = 7.8454e-04
Loss = 6.2836e-02, PNorm = 107.5274, GNorm = 0.2888, lr_0 = 7.8400e-04
Loss = 4.7906e-02, PNorm = 107.6177, GNorm = 0.7006, lr_0 = 7.8346e-04
Loss = 5.1907e-02, PNorm = 107.6929, GNorm = 0.2705, lr_0 = 7.8293e-04
Loss = 5.1800e-02, PNorm = 107.7690, GNorm = 0.3594, lr_0 = 7.8239e-04
Loss = 5.4579e-02, PNorm = 107.8413, GNorm = 0.9002, lr_0 = 7.8185e-04
Loss = 7.1174e-02, PNorm = 107.9207, GNorm = 0.4104, lr_0 = 7.8132e-04
Validation mae = 0.503570
Epoch 5
Loss = 4.1543e-02, PNorm = 107.9926, GNorm = 0.4481, lr_0 = 7.8078e-04
Loss = 4.4098e-02, PNorm = 108.0622, GNorm = 0.3412, lr_0 = 7.8025e-04
Loss = 4.2318e-02, PNorm = 108.1222, GNorm = 0.2782, lr_0 = 7.7971e-04
Loss = 4.0571e-02, PNorm = 108.1825, GNorm = 0.6516, lr_0 = 7.7918e-04
Loss = 3.7950e-02, PNorm = 108.2284, GNorm = 0.7281, lr_0 = 7.7864e-04
Loss = 4.0369e-02, PNorm = 108.2766, GNorm = 0.4306, lr_0 = 7.7811e-04
Loss = 3.4369e-02, PNorm = 108.3271, GNorm = 0.2694, lr_0 = 7.7758e-04
Loss = 3.4660e-02, PNorm = 108.3775, GNorm = 0.3426, lr_0 = 7.7705e-04
Loss = 2.7582e-02, PNorm = 108.4275, GNorm = 0.2636, lr_0 = 7.7651e-04
Loss = 3.7483e-02, PNorm = 108.4665, GNorm = 0.5267, lr_0 = 7.7598e-04
Loss = 3.6658e-02, PNorm = 108.5200, GNorm = 0.5247, lr_0 = 7.7545e-04
Loss = 3.6040e-02, PNorm = 108.5701, GNorm = 0.4768, lr_0 = 7.7492e-04
Loss = 3.5048e-02, PNorm = 108.6164, GNorm = 0.2258, lr_0 = 7.7439e-04
Loss = 3.7678e-02, PNorm = 108.6631, GNorm = 0.3247, lr_0 = 7.7386e-04
Loss = 3.1334e-02, PNorm = 108.7155, GNorm = 0.3414, lr_0 = 7.7333e-04
Loss = 2.9535e-02, PNorm = 108.7647, GNorm = 0.5691, lr_0 = 7.7280e-04
Loss = 3.2111e-02, PNorm = 108.8188, GNorm = 0.2701, lr_0 = 7.7227e-04
Loss = 3.4805e-02, PNorm = 108.8674, GNorm = 0.2311, lr_0 = 7.7174e-04
Loss = 3.6143e-02, PNorm = 108.9235, GNorm = 0.2938, lr_0 = 7.7121e-04
Loss = 3.2252e-02, PNorm = 108.9676, GNorm = 0.3324, lr_0 = 7.7068e-04
Loss = 3.1360e-02, PNorm = 109.0214, GNorm = 0.3319, lr_0 = 7.7015e-04
Loss = 3.5394e-02, PNorm = 109.0660, GNorm = 0.5538, lr_0 = 7.6963e-04
Loss = 3.2452e-02, PNorm = 109.1108, GNorm = 0.6074, lr_0 = 7.6910e-04
Loss = 3.5202e-02, PNorm = 109.1622, GNorm = 0.5142, lr_0 = 7.6857e-04
Loss = 3.5753e-02, PNorm = 109.2120, GNorm = 0.4677, lr_0 = 7.6805e-04
Loss = 2.8120e-02, PNorm = 109.2658, GNorm = 0.3120, lr_0 = 7.6752e-04
Loss = 3.2035e-02, PNorm = 109.3144, GNorm = 0.4475, lr_0 = 7.6699e-04
Loss = 3.6318e-02, PNorm = 109.3555, GNorm = 0.3546, lr_0 = 7.6647e-04
Loss = 3.5843e-02, PNorm = 109.4027, GNorm = 0.6495, lr_0 = 7.6594e-04
Loss = 2.9888e-02, PNorm = 109.4543, GNorm = 0.3004, lr_0 = 7.6542e-04
Loss = 3.1308e-02, PNorm = 109.5021, GNorm = 0.2805, lr_0 = 7.6489e-04
Loss = 3.5978e-02, PNorm = 109.5468, GNorm = 0.3140, lr_0 = 7.6437e-04
Loss = 3.6968e-02, PNorm = 109.6047, GNorm = 0.3846, lr_0 = 7.6385e-04
Loss = 3.2713e-02, PNorm = 109.6660, GNorm = 0.3210, lr_0 = 7.6332e-04
Loss = 3.4143e-02, PNorm = 109.7328, GNorm = 0.7135, lr_0 = 7.6280e-04
Loss = 2.8959e-02, PNorm = 109.7860, GNorm = 0.3467, lr_0 = 7.6228e-04
Loss = 3.0154e-02, PNorm = 109.8359, GNorm = 0.4423, lr_0 = 7.6176e-04
Loss = 3.7850e-02, PNorm = 109.8823, GNorm = 0.3614, lr_0 = 7.6123e-04
Loss = 3.2808e-02, PNorm = 109.9320, GNorm = 0.5074, lr_0 = 7.6071e-04
Loss = 3.6643e-02, PNorm = 109.9862, GNorm = 0.3500, lr_0 = 7.6019e-04
Loss = 3.5130e-02, PNorm = 110.0414, GNorm = 0.2868, lr_0 = 7.5967e-04
Loss = 3.9584e-02, PNorm = 110.0924, GNorm = 0.2088, lr_0 = 7.5915e-04
Loss = 3.3840e-02, PNorm = 110.1420, GNorm = 0.1810, lr_0 = 7.5863e-04
Loss = 3.6921e-02, PNorm = 110.1958, GNorm = 0.5787, lr_0 = 7.5811e-04
Loss = 3.3725e-02, PNorm = 110.2507, GNorm = 0.4165, lr_0 = 7.5759e-04
Loss = 3.5483e-02, PNorm = 110.3105, GNorm = 0.2824, lr_0 = 7.5707e-04
Loss = 3.6953e-02, PNorm = 110.3724, GNorm = 0.2635, lr_0 = 7.5655e-04
Loss = 3.3152e-02, PNorm = 110.4259, GNorm = 0.2370, lr_0 = 7.5603e-04
Loss = 3.1177e-02, PNorm = 110.4826, GNorm = 0.3665, lr_0 = 7.5552e-04
Loss = 4.2611e-02, PNorm = 110.5389, GNorm = 0.9823, lr_0 = 7.5500e-04
Loss = 3.8831e-02, PNorm = 110.6005, GNorm = 0.2961, lr_0 = 7.5448e-04
Loss = 3.5343e-02, PNorm = 110.6634, GNorm = 0.5701, lr_0 = 7.5397e-04
Loss = 3.2213e-02, PNorm = 110.7186, GNorm = 0.5661, lr_0 = 7.5345e-04
Loss = 3.2464e-02, PNorm = 110.7805, GNorm = 0.2433, lr_0 = 7.5293e-04
Loss = 3.5885e-02, PNorm = 110.8394, GNorm = 0.4498, lr_0 = 7.5242e-04
Loss = 3.4360e-02, PNorm = 110.8925, GNorm = 0.2219, lr_0 = 7.5190e-04
Loss = 4.1379e-02, PNorm = 110.9523, GNorm = 0.4162, lr_0 = 7.5139e-04
Loss = 3.7252e-02, PNorm = 111.0089, GNorm = 0.6492, lr_0 = 7.5087e-04
Loss = 3.8147e-02, PNorm = 111.0674, GNorm = 0.6108, lr_0 = 7.5036e-04
Loss = 3.2498e-02, PNorm = 111.1215, GNorm = 0.5702, lr_0 = 7.4984e-04
Loss = 3.5810e-02, PNorm = 111.1841, GNorm = 0.4373, lr_0 = 7.4933e-04
Loss = 3.8106e-02, PNorm = 111.2429, GNorm = 0.4849, lr_0 = 7.4882e-04
Loss = 4.0376e-02, PNorm = 111.2963, GNorm = 0.4602, lr_0 = 7.4830e-04
Loss = 3.1233e-02, PNorm = 111.3609, GNorm = 0.2580, lr_0 = 7.4779e-04
Loss = 4.0008e-02, PNorm = 111.4225, GNorm = 0.3179, lr_0 = 7.4728e-04
Loss = 3.4486e-02, PNorm = 111.4883, GNorm = 0.2463, lr_0 = 7.4677e-04
Loss = 3.6150e-02, PNorm = 111.5481, GNorm = 0.3522, lr_0 = 7.4625e-04
Loss = 3.5594e-02, PNorm = 111.6099, GNorm = 0.2979, lr_0 = 7.4574e-04
Loss = 3.7373e-02, PNorm = 111.6692, GNorm = 0.8810, lr_0 = 7.4523e-04
Loss = 3.4397e-02, PNorm = 111.7229, GNorm = 0.1697, lr_0 = 7.4472e-04
Loss = 4.0086e-02, PNorm = 111.7872, GNorm = 1.0313, lr_0 = 7.4421e-04
Loss = 3.6253e-02, PNorm = 111.8533, GNorm = 0.1993, lr_0 = 7.4370e-04
Loss = 3.4818e-02, PNorm = 111.9171, GNorm = 0.5042, lr_0 = 7.4319e-04
Loss = 3.7788e-02, PNorm = 111.9719, GNorm = 0.3873, lr_0 = 7.4268e-04
Loss = 4.0655e-02, PNorm = 112.0375, GNorm = 0.3847, lr_0 = 7.4217e-04
Loss = 4.1225e-02, PNorm = 112.1045, GNorm = 0.3206, lr_0 = 7.4167e-04
Loss = 3.5600e-02, PNorm = 112.1733, GNorm = 0.7783, lr_0 = 7.4116e-04
Loss = 3.7575e-02, PNorm = 112.2419, GNorm = 0.6375, lr_0 = 7.4065e-04
Loss = 3.7038e-02, PNorm = 112.3010, GNorm = 0.5195, lr_0 = 7.4014e-04
Loss = 3.0075e-02, PNorm = 112.3692, GNorm = 0.6608, lr_0 = 7.3964e-04
Loss = 4.1311e-02, PNorm = 112.4259, GNorm = 0.1933, lr_0 = 7.3913e-04
Loss = 3.6542e-02, PNorm = 112.4943, GNorm = 0.3500, lr_0 = 7.3862e-04
Loss = 3.5948e-02, PNorm = 112.5529, GNorm = 0.5131, lr_0 = 7.3812e-04
Loss = 3.8829e-02, PNorm = 112.6157, GNorm = 0.2679, lr_0 = 7.3761e-04
Loss = 3.8065e-02, PNorm = 112.6834, GNorm = 0.5293, lr_0 = 7.3711e-04
Loss = 3.7584e-02, PNorm = 112.7456, GNorm = 0.4697, lr_0 = 7.3660e-04
Loss = 4.3610e-02, PNorm = 112.8049, GNorm = 0.2632, lr_0 = 7.3610e-04
Loss = 3.8084e-02, PNorm = 112.8665, GNorm = 0.4187, lr_0 = 7.3559e-04
Loss = 3.8704e-02, PNorm = 112.9368, GNorm = 0.3922, lr_0 = 7.3509e-04
Loss = 4.2651e-02, PNorm = 113.0050, GNorm = 0.7460, lr_0 = 7.3458e-04
Loss = 3.9807e-02, PNorm = 113.0766, GNorm = 0.4588, lr_0 = 7.3408e-04
Loss = 4.5289e-02, PNorm = 113.1499, GNorm = 1.1693, lr_0 = 7.3358e-04
Loss = 3.8626e-02, PNorm = 113.2209, GNorm = 0.4998, lr_0 = 7.3308e-04
Loss = 3.5769e-02, PNorm = 113.2937, GNorm = 0.6222, lr_0 = 7.3257e-04
Loss = 3.7258e-02, PNorm = 113.3638, GNorm = 0.6158, lr_0 = 7.3207e-04
Loss = 3.8446e-02, PNorm = 113.4251, GNorm = 0.7991, lr_0 = 7.3157e-04
Loss = 3.9678e-02, PNorm = 113.4930, GNorm = 0.2843, lr_0 = 7.3107e-04
Loss = 3.8825e-02, PNorm = 113.5572, GNorm = 0.2300, lr_0 = 7.3057e-04
Loss = 4.1168e-02, PNorm = 113.6310, GNorm = 0.4873, lr_0 = 7.3007e-04
Loss = 4.3789e-02, PNorm = 113.7049, GNorm = 0.3469, lr_0 = 7.2957e-04
Loss = 3.7265e-02, PNorm = 113.7721, GNorm = 0.4454, lr_0 = 7.2907e-04
Loss = 3.7666e-02, PNorm = 113.8409, GNorm = 0.2344, lr_0 = 7.2857e-04
Loss = 3.8705e-02, PNorm = 113.9065, GNorm = 0.2477, lr_0 = 7.2807e-04
Loss = 3.7462e-02, PNorm = 113.9762, GNorm = 0.4428, lr_0 = 7.2757e-04
Loss = 4.3420e-02, PNorm = 114.0424, GNorm = 0.4293, lr_0 = 7.2707e-04
Loss = 3.8928e-02, PNorm = 114.1158, GNorm = 1.0037, lr_0 = 7.2657e-04
Loss = 4.3252e-02, PNorm = 114.1812, GNorm = 0.2676, lr_0 = 7.2608e-04
Loss = 3.6314e-02, PNorm = 114.2456, GNorm = 0.5296, lr_0 = 7.2558e-04
Loss = 3.9629e-02, PNorm = 114.3098, GNorm = 0.3611, lr_0 = 7.2508e-04
Loss = 4.3549e-02, PNorm = 114.3773, GNorm = 0.8461, lr_0 = 7.2458e-04
Loss = 4.6975e-02, PNorm = 114.4509, GNorm = 0.2548, lr_0 = 7.2409e-04
Loss = 4.5260e-02, PNorm = 114.5229, GNorm = 0.5221, lr_0 = 7.2359e-04
Loss = 4.5203e-02, PNorm = 114.6009, GNorm = 0.3348, lr_0 = 7.2310e-04
Loss = 4.5286e-02, PNorm = 114.6799, GNorm = 0.6425, lr_0 = 7.2260e-04
Loss = 3.9281e-02, PNorm = 114.7675, GNorm = 0.4992, lr_0 = 7.2211e-04
Loss = 3.7537e-02, PNorm = 114.8388, GNorm = 0.6548, lr_0 = 7.2161e-04
Loss = 3.6024e-02, PNorm = 114.9111, GNorm = 0.4106, lr_0 = 7.2112e-04
Loss = 3.8429e-02, PNorm = 114.9834, GNorm = 0.3324, lr_0 = 7.2062e-04
Loss = 3.8394e-02, PNorm = 115.0506, GNorm = 0.8477, lr_0 = 7.2013e-04
Loss = 4.1463e-02, PNorm = 115.1155, GNorm = 0.5645, lr_0 = 7.1964e-04
Validation mae = 0.492418
Epoch 6
Loss = 3.3483e-02, PNorm = 115.1700, GNorm = 0.4005, lr_0 = 7.1914e-04
Loss = 2.6486e-02, PNorm = 115.2205, GNorm = 0.2388, lr_0 = 7.1865e-04
Loss = 3.2535e-02, PNorm = 115.2650, GNorm = 0.2440, lr_0 = 7.1816e-04
Loss = 2.8723e-02, PNorm = 115.3081, GNorm = 0.1809, lr_0 = 7.1767e-04
Loss = 3.3090e-02, PNorm = 115.3503, GNorm = 0.6831, lr_0 = 7.1717e-04
Loss = 3.1333e-02, PNorm = 115.3968, GNorm = 0.2721, lr_0 = 7.1668e-04
Loss = 2.9808e-02, PNorm = 115.4407, GNorm = 0.2412, lr_0 = 7.1619e-04
Loss = 3.0729e-02, PNorm = 115.4882, GNorm = 0.2138, lr_0 = 7.1570e-04
Loss = 2.4956e-02, PNorm = 115.5260, GNorm = 0.2237, lr_0 = 7.1521e-04
Loss = 2.5701e-02, PNorm = 115.5608, GNorm = 0.1803, lr_0 = 7.1472e-04
Loss = 2.8520e-02, PNorm = 115.5983, GNorm = 0.4063, lr_0 = 7.1423e-04
Loss = 3.0498e-02, PNorm = 115.6378, GNorm = 0.6320, lr_0 = 7.1374e-04
Loss = 3.2066e-02, PNorm = 115.6791, GNorm = 0.2878, lr_0 = 7.1325e-04
Loss = 2.9146e-02, PNorm = 115.7274, GNorm = 0.3472, lr_0 = 7.1277e-04
Loss = 2.6038e-02, PNorm = 115.7735, GNorm = 0.3272, lr_0 = 7.1228e-04
Loss = 3.0555e-02, PNorm = 115.8218, GNorm = 0.4249, lr_0 = 7.1179e-04
Loss = 2.6062e-02, PNorm = 115.8781, GNorm = 0.2588, lr_0 = 7.1130e-04
Loss = 2.9703e-02, PNorm = 115.9228, GNorm = 0.4092, lr_0 = 7.1081e-04
Loss = 2.9849e-02, PNorm = 115.9736, GNorm = 0.5389, lr_0 = 7.1033e-04
Loss = 3.5435e-02, PNorm = 116.0192, GNorm = 0.2166, lr_0 = 7.0984e-04
Loss = 3.1546e-02, PNorm = 116.0701, GNorm = 0.4783, lr_0 = 7.0935e-04
Loss = 2.6359e-02, PNorm = 116.1238, GNorm = 0.3217, lr_0 = 7.0887e-04
Loss = 3.2056e-02, PNorm = 116.1698, GNorm = 0.5341, lr_0 = 7.0838e-04
Loss = 2.5054e-02, PNorm = 116.2169, GNorm = 0.5812, lr_0 = 7.0790e-04
Loss = 2.6393e-02, PNorm = 116.2609, GNorm = 0.4805, lr_0 = 7.0741e-04
Loss = 2.9331e-02, PNorm = 116.3072, GNorm = 0.2027, lr_0 = 7.0693e-04
Loss = 3.1488e-02, PNorm = 116.3458, GNorm = 0.3237, lr_0 = 7.0644e-04
Loss = 2.8822e-02, PNorm = 116.3920, GNorm = 0.3769, lr_0 = 7.0596e-04
Loss = 2.7680e-02, PNorm = 116.4376, GNorm = 0.3838, lr_0 = 7.0548e-04
Loss = 2.9219e-02, PNorm = 116.4883, GNorm = 0.3931, lr_0 = 7.0499e-04
Loss = 2.4920e-02, PNorm = 116.5325, GNorm = 0.5641, lr_0 = 7.0451e-04
Loss = 2.4942e-02, PNorm = 116.5796, GNorm = 0.2760, lr_0 = 7.0403e-04
Loss = 2.9693e-02, PNorm = 116.6279, GNorm = 0.3349, lr_0 = 7.0354e-04
Loss = 2.6319e-02, PNorm = 116.6702, GNorm = 0.3941, lr_0 = 7.0306e-04
Loss = 2.5112e-02, PNorm = 116.7162, GNorm = 0.2455, lr_0 = 7.0258e-04
Loss = 2.9977e-02, PNorm = 116.7639, GNorm = 0.2962, lr_0 = 7.0210e-04
Loss = 2.7084e-02, PNorm = 116.8142, GNorm = 0.2435, lr_0 = 7.0162e-04
Loss = 2.4833e-02, PNorm = 116.8596, GNorm = 0.4769, lr_0 = 7.0114e-04
Loss = 2.7864e-02, PNorm = 116.9055, GNorm = 0.2427, lr_0 = 7.0066e-04
Loss = 2.7689e-02, PNorm = 116.9564, GNorm = 0.2980, lr_0 = 7.0018e-04
Loss = 2.6011e-02, PNorm = 117.0083, GNorm = 0.5195, lr_0 = 6.9970e-04
Loss = 2.3002e-02, PNorm = 117.0592, GNorm = 0.2355, lr_0 = 6.9922e-04
Loss = 2.5719e-02, PNorm = 117.0988, GNorm = 0.5119, lr_0 = 6.9874e-04
Loss = 2.4924e-02, PNorm = 117.1471, GNorm = 0.4311, lr_0 = 6.9826e-04
Loss = 2.6753e-02, PNorm = 117.1934, GNorm = 0.8149, lr_0 = 6.9778e-04
Loss = 2.9364e-02, PNorm = 117.2497, GNorm = 0.7761, lr_0 = 6.9730e-04
Loss = 2.7923e-02, PNorm = 117.2978, GNorm = 0.2904, lr_0 = 6.9683e-04
Loss = 2.6984e-02, PNorm = 117.3470, GNorm = 0.1796, lr_0 = 6.9635e-04
Loss = 2.5878e-02, PNorm = 117.3909, GNorm = 0.4232, lr_0 = 6.9587e-04
Loss = 2.7671e-02, PNorm = 117.4402, GNorm = 0.4068, lr_0 = 6.9540e-04
Loss = 2.9452e-02, PNorm = 117.4879, GNorm = 0.4570, lr_0 = 6.9492e-04
Loss = 2.7790e-02, PNorm = 117.5357, GNorm = 0.7256, lr_0 = 6.9444e-04
Loss = 3.0426e-02, PNorm = 117.5796, GNorm = 0.2930, lr_0 = 6.9397e-04
Loss = 2.9502e-02, PNorm = 117.6320, GNorm = 0.2555, lr_0 = 6.9349e-04
Loss = 2.9139e-02, PNorm = 117.6779, GNorm = 0.2850, lr_0 = 6.9302e-04
Loss = 2.6985e-02, PNorm = 117.7314, GNorm = 0.4454, lr_0 = 6.9254e-04
Loss = 3.4322e-02, PNorm = 117.7920, GNorm = 0.2261, lr_0 = 6.9207e-04
Loss = 2.6667e-02, PNorm = 117.8430, GNorm = 0.2301, lr_0 = 6.9159e-04
Loss = 2.4961e-02, PNorm = 117.8898, GNorm = 0.5179, lr_0 = 6.9112e-04
Loss = 2.5476e-02, PNorm = 117.9336, GNorm = 0.4192, lr_0 = 6.9065e-04
Loss = 2.4847e-02, PNorm = 117.9843, GNorm = 0.2169, lr_0 = 6.9017e-04
Loss = 2.5513e-02, PNorm = 118.0323, GNorm = 0.3127, lr_0 = 6.8970e-04
Loss = 2.4602e-02, PNorm = 118.0846, GNorm = 0.2400, lr_0 = 6.8923e-04
Loss = 2.6308e-02, PNorm = 118.1250, GNorm = 0.4998, lr_0 = 6.8876e-04
Loss = 2.4914e-02, PNorm = 118.1702, GNorm = 0.2943, lr_0 = 6.8828e-04
Loss = 3.2587e-02, PNorm = 118.2238, GNorm = 0.4593, lr_0 = 6.8781e-04
Loss = 3.0721e-02, PNorm = 118.2892, GNorm = 0.4497, lr_0 = 6.8734e-04
Loss = 3.1684e-02, PNorm = 118.3440, GNorm = 0.3439, lr_0 = 6.8687e-04
Loss = 3.0714e-02, PNorm = 118.3934, GNorm = 0.2096, lr_0 = 6.8640e-04
Loss = 2.7240e-02, PNorm = 118.4527, GNorm = 0.2363, lr_0 = 6.8593e-04
Loss = 2.8854e-02, PNorm = 118.5022, GNorm = 0.2830, lr_0 = 6.8546e-04
Loss = 3.3176e-02, PNorm = 118.5608, GNorm = 0.2686, lr_0 = 6.8499e-04
Loss = 3.0565e-02, PNorm = 118.6173, GNorm = 0.4394, lr_0 = 6.8452e-04
Loss = 3.2467e-02, PNorm = 118.6741, GNorm = 0.3918, lr_0 = 6.8405e-04
Loss = 2.8749e-02, PNorm = 118.7408, GNorm = 0.9291, lr_0 = 6.8358e-04
Loss = 2.6459e-02, PNorm = 118.8014, GNorm = 0.3232, lr_0 = 6.8312e-04
Loss = 2.9066e-02, PNorm = 118.8545, GNorm = 0.2513, lr_0 = 6.8265e-04
Loss = 2.8942e-02, PNorm = 118.9009, GNorm = 0.5718, lr_0 = 6.8218e-04
Loss = 2.4920e-02, PNorm = 118.9533, GNorm = 0.4093, lr_0 = 6.8171e-04
Loss = 2.6277e-02, PNorm = 119.0048, GNorm = 0.3076, lr_0 = 6.8125e-04
Loss = 3.3219e-02, PNorm = 119.0593, GNorm = 0.4466, lr_0 = 6.8078e-04
Loss = 2.4274e-02, PNorm = 119.1200, GNorm = 0.4028, lr_0 = 6.8031e-04
Loss = 2.4496e-02, PNorm = 119.1734, GNorm = 0.2497, lr_0 = 6.7985e-04
Loss = 2.9824e-02, PNorm = 119.2271, GNorm = 0.5286, lr_0 = 6.7938e-04
Loss = 2.7685e-02, PNorm = 119.2879, GNorm = 0.6180, lr_0 = 6.7892e-04
Loss = 3.2089e-02, PNorm = 119.3481, GNorm = 0.3558, lr_0 = 6.7845e-04
Loss = 2.8274e-02, PNorm = 119.4086, GNorm = 0.3688, lr_0 = 6.7799e-04
Loss = 3.0578e-02, PNorm = 119.4682, GNorm = 0.5159, lr_0 = 6.7752e-04
Loss = 3.0932e-02, PNorm = 119.5218, GNorm = 0.3361, lr_0 = 6.7706e-04
Loss = 3.0404e-02, PNorm = 119.5746, GNorm = 0.4675, lr_0 = 6.7659e-04
Loss = 3.2815e-02, PNorm = 119.6382, GNorm = 0.6671, lr_0 = 6.7613e-04
Loss = 2.6948e-02, PNorm = 119.7001, GNorm = 0.2507, lr_0 = 6.7567e-04
Loss = 3.2384e-02, PNorm = 119.7631, GNorm = 0.2904, lr_0 = 6.7520e-04
Loss = 2.8295e-02, PNorm = 119.8310, GNorm = 0.5679, lr_0 = 6.7474e-04
Loss = 3.0102e-02, PNorm = 119.8869, GNorm = 0.5919, lr_0 = 6.7428e-04
Loss = 3.6192e-02, PNorm = 119.9505, GNorm = 0.7300, lr_0 = 6.7382e-04
Loss = 2.8736e-02, PNorm = 120.0151, GNorm = 0.1755, lr_0 = 6.7335e-04
Loss = 3.0232e-02, PNorm = 120.0709, GNorm = 0.3988, lr_0 = 6.7289e-04
Loss = 3.1428e-02, PNorm = 120.1250, GNorm = 0.5970, lr_0 = 6.7243e-04
Loss = 3.0864e-02, PNorm = 120.1836, GNorm = 0.3139, lr_0 = 6.7197e-04
Loss = 2.4212e-02, PNorm = 120.2409, GNorm = 0.1762, lr_0 = 6.7151e-04
Loss = 3.2916e-02, PNorm = 120.2944, GNorm = 0.3374, lr_0 = 6.7105e-04
Loss = 2.9605e-02, PNorm = 120.3484, GNorm = 0.2572, lr_0 = 6.7059e-04
Loss = 3.3662e-02, PNorm = 120.4020, GNorm = 0.6758, lr_0 = 6.7013e-04
Loss = 3.1150e-02, PNorm = 120.4602, GNorm = 0.2883, lr_0 = 6.6967e-04
Loss = 2.6758e-02, PNorm = 120.5286, GNorm = 0.2346, lr_0 = 6.6921e-04
Loss = 3.4295e-02, PNorm = 120.5907, GNorm = 0.2841, lr_0 = 6.6876e-04
Loss = 2.8656e-02, PNorm = 120.6519, GNorm = 0.3066, lr_0 = 6.6830e-04
Loss = 3.6634e-02, PNorm = 120.7218, GNorm = 0.4372, lr_0 = 6.6784e-04
Loss = 3.0818e-02, PNorm = 120.7935, GNorm = 0.3084, lr_0 = 6.6738e-04
Loss = 3.2780e-02, PNorm = 120.8621, GNorm = 0.2530, lr_0 = 6.6693e-04
Loss = 3.7853e-02, PNorm = 120.9296, GNorm = 0.3683, lr_0 = 6.6647e-04
Loss = 3.7093e-02, PNorm = 120.9949, GNorm = 0.3127, lr_0 = 6.6601e-04
Loss = 3.0727e-02, PNorm = 121.0525, GNorm = 0.3220, lr_0 = 6.6556e-04
Loss = 2.8683e-02, PNorm = 121.1121, GNorm = 0.3005, lr_0 = 6.6510e-04
Loss = 3.4547e-02, PNorm = 121.1731, GNorm = 0.2831, lr_0 = 6.6464e-04
Loss = 3.0732e-02, PNorm = 121.2347, GNorm = 0.5847, lr_0 = 6.6419e-04
Loss = 3.4701e-02, PNorm = 121.2989, GNorm = 0.6711, lr_0 = 6.6373e-04
Loss = 2.4882e-02, PNorm = 121.3599, GNorm = 0.2139, lr_0 = 6.6328e-04
Loss = 3.0484e-02, PNorm = 121.4162, GNorm = 0.1891, lr_0 = 6.6282e-04
Validation mae = 0.489496
Epoch 7
Loss = 2.3333e-02, PNorm = 121.4674, GNorm = 0.2165, lr_0 = 6.6237e-04
Loss = 2.3711e-02, PNorm = 121.5184, GNorm = 0.3139, lr_0 = 6.6192e-04
Loss = 2.4611e-02, PNorm = 121.5594, GNorm = 0.1661, lr_0 = 6.6146e-04
Loss = 2.8940e-02, PNorm = 121.5944, GNorm = 0.5020, lr_0 = 6.6101e-04
Loss = 2.7324e-02, PNorm = 121.6330, GNorm = 0.6028, lr_0 = 6.6056e-04
Loss = 2.6943e-02, PNorm = 121.6815, GNorm = 0.2361, lr_0 = 6.6011e-04
Loss = 2.3565e-02, PNorm = 121.7289, GNorm = 0.3151, lr_0 = 6.5965e-04
Loss = 2.5373e-02, PNorm = 121.7748, GNorm = 0.2606, lr_0 = 6.5920e-04
Loss = 2.6540e-02, PNorm = 121.8207, GNorm = 0.2471, lr_0 = 6.5875e-04
Loss = 2.0030e-02, PNorm = 121.8623, GNorm = 0.1801, lr_0 = 6.5830e-04
Loss = 2.2874e-02, PNorm = 121.8942, GNorm = 0.2409, lr_0 = 6.5785e-04
Loss = 2.5020e-02, PNorm = 121.9304, GNorm = 0.2168, lr_0 = 6.5740e-04
Loss = 2.5562e-02, PNorm = 121.9648, GNorm = 0.3286, lr_0 = 6.5695e-04
Loss = 2.0470e-02, PNorm = 122.0121, GNorm = 0.3116, lr_0 = 6.5650e-04
Loss = 2.1116e-02, PNorm = 122.0497, GNorm = 0.4536, lr_0 = 6.5605e-04
Loss = 2.7271e-02, PNorm = 122.0953, GNorm = 0.1673, lr_0 = 6.5560e-04
Loss = 2.2100e-02, PNorm = 122.1342, GNorm = 0.2757, lr_0 = 6.5515e-04
Loss = 2.1904e-02, PNorm = 122.1698, GNorm = 0.4976, lr_0 = 6.5470e-04
Loss = 2.2977e-02, PNorm = 122.2087, GNorm = 0.2187, lr_0 = 6.5425e-04
Loss = 2.0689e-02, PNorm = 122.2461, GNorm = 0.4727, lr_0 = 6.5380e-04
Loss = 1.9899e-02, PNorm = 122.2850, GNorm = 0.2823, lr_0 = 6.5335e-04
Loss = 2.1659e-02, PNorm = 122.3183, GNorm = 0.3878, lr_0 = 6.5291e-04
Loss = 2.7088e-02, PNorm = 122.3552, GNorm = 0.4374, lr_0 = 6.5246e-04
Loss = 2.4030e-02, PNorm = 122.3966, GNorm = 0.9231, lr_0 = 6.5201e-04
Loss = 2.4850e-02, PNorm = 122.4399, GNorm = 0.2290, lr_0 = 6.5157e-04
Loss = 2.1293e-02, PNorm = 122.4877, GNorm = 0.2390, lr_0 = 6.5112e-04
Loss = 2.3116e-02, PNorm = 122.5274, GNorm = 0.4740, lr_0 = 6.5067e-04
Loss = 2.2332e-02, PNorm = 122.5678, GNorm = 0.1571, lr_0 = 6.5023e-04
Loss = 2.6550e-02, PNorm = 122.6073, GNorm = 0.5197, lr_0 = 6.4978e-04
Loss = 2.7517e-02, PNorm = 122.6491, GNorm = 0.3208, lr_0 = 6.4934e-04
Loss = 1.8952e-02, PNorm = 122.6903, GNorm = 0.1599, lr_0 = 6.4889e-04
Loss = 2.2640e-02, PNorm = 122.7329, GNorm = 0.2869, lr_0 = 6.4845e-04
Loss = 2.0193e-02, PNorm = 122.7815, GNorm = 0.3058, lr_0 = 6.4800e-04
Loss = 2.0494e-02, PNorm = 122.8215, GNorm = 0.3259, lr_0 = 6.4756e-04
Loss = 2.1379e-02, PNorm = 122.8604, GNorm = 0.4157, lr_0 = 6.4712e-04
Loss = 2.2576e-02, PNorm = 122.9029, GNorm = 0.4426, lr_0 = 6.4667e-04
Loss = 2.2215e-02, PNorm = 122.9379, GNorm = 0.7239, lr_0 = 6.4623e-04
Loss = 2.1900e-02, PNorm = 122.9745, GNorm = 0.4526, lr_0 = 6.4579e-04
Loss = 2.1785e-02, PNorm = 123.0116, GNorm = 0.7072, lr_0 = 6.4534e-04
Loss = 2.1595e-02, PNorm = 123.0498, GNorm = 0.1364, lr_0 = 6.4490e-04
Loss = 1.9620e-02, PNorm = 123.0868, GNorm = 0.2880, lr_0 = 6.4446e-04
Loss = 2.1421e-02, PNorm = 123.1278, GNorm = 0.1926, lr_0 = 6.4402e-04
Loss = 2.1495e-02, PNorm = 123.1744, GNorm = 0.3539, lr_0 = 6.4358e-04
Loss = 2.3515e-02, PNorm = 123.2151, GNorm = 0.3198, lr_0 = 6.4314e-04
Loss = 2.1817e-02, PNorm = 123.2661, GNorm = 0.1606, lr_0 = 6.4270e-04
Loss = 2.3736e-02, PNorm = 123.3097, GNorm = 0.3795, lr_0 = 6.4226e-04
Loss = 2.3723e-02, PNorm = 123.3504, GNorm = 0.3494, lr_0 = 6.4182e-04
Loss = 2.3253e-02, PNorm = 123.3907, GNorm = 0.1846, lr_0 = 6.4138e-04
Loss = 1.9574e-02, PNorm = 123.4361, GNorm = 0.3127, lr_0 = 6.4094e-04
Loss = 2.0669e-02, PNorm = 123.4818, GNorm = 0.2347, lr_0 = 6.4050e-04
Loss = 2.3938e-02, PNorm = 123.5236, GNorm = 0.3977, lr_0 = 6.4006e-04
Loss = 1.8851e-02, PNorm = 123.5627, GNorm = 0.1490, lr_0 = 6.3962e-04
Loss = 2.1261e-02, PNorm = 123.6011, GNorm = 0.5588, lr_0 = 6.3918e-04
Loss = 1.9549e-02, PNorm = 123.6409, GNorm = 0.5114, lr_0 = 6.3874e-04
Loss = 2.0410e-02, PNorm = 123.6757, GNorm = 0.3376, lr_0 = 6.3831e-04
Loss = 2.4022e-02, PNorm = 123.7151, GNorm = 0.2628, lr_0 = 6.3787e-04
Loss = 1.9393e-02, PNorm = 123.7624, GNorm = 0.1978, lr_0 = 6.3743e-04
Loss = 1.9797e-02, PNorm = 123.7984, GNorm = 0.1785, lr_0 = 6.3700e-04
Loss = 2.1609e-02, PNorm = 123.8315, GNorm = 0.5734, lr_0 = 6.3656e-04
Loss = 2.6120e-02, PNorm = 123.8737, GNorm = 0.3956, lr_0 = 6.3612e-04
Loss = 2.3255e-02, PNorm = 123.9235, GNorm = 0.3962, lr_0 = 6.3569e-04
Loss = 2.0116e-02, PNorm = 123.9705, GNorm = 0.2521, lr_0 = 6.3525e-04
Loss = 2.3200e-02, PNorm = 124.0157, GNorm = 0.3614, lr_0 = 6.3482e-04
Loss = 2.7833e-02, PNorm = 124.0584, GNorm = 0.4997, lr_0 = 6.3438e-04
Loss = 2.0494e-02, PNorm = 124.1099, GNorm = 0.2283, lr_0 = 6.3395e-04
Loss = 2.6770e-02, PNorm = 124.1634, GNorm = 0.4658, lr_0 = 6.3351e-04
Loss = 2.2044e-02, PNorm = 124.2150, GNorm = 0.3654, lr_0 = 6.3308e-04
Loss = 2.1323e-02, PNorm = 124.2607, GNorm = 0.4960, lr_0 = 6.3265e-04
Loss = 2.5480e-02, PNorm = 124.3055, GNorm = 0.3571, lr_0 = 6.3221e-04
Loss = 2.5263e-02, PNorm = 124.3548, GNorm = 0.2735, lr_0 = 6.3178e-04
Loss = 2.0081e-02, PNorm = 124.4052, GNorm = 0.2953, lr_0 = 6.3135e-04
Loss = 2.4391e-02, PNorm = 124.4530, GNorm = 0.4496, lr_0 = 6.3091e-04
Loss = 2.5577e-02, PNorm = 124.4968, GNorm = 0.3092, lr_0 = 6.3048e-04
Loss = 2.4489e-02, PNorm = 124.5514, GNorm = 0.3422, lr_0 = 6.3005e-04
Loss = 2.7422e-02, PNorm = 124.5953, GNorm = 0.8629, lr_0 = 6.2962e-04
Loss = 2.2192e-02, PNorm = 124.6376, GNorm = 0.6957, lr_0 = 6.2919e-04
Loss = 2.2086e-02, PNorm = 124.6855, GNorm = 0.3540, lr_0 = 6.2876e-04
Loss = 2.2402e-02, PNorm = 124.7400, GNorm = 0.2424, lr_0 = 6.2833e-04
Loss = 2.1041e-02, PNorm = 124.7869, GNorm = 0.2107, lr_0 = 6.2789e-04
Loss = 2.4605e-02, PNorm = 124.8299, GNorm = 0.3117, lr_0 = 6.2746e-04
Loss = 2.3247e-02, PNorm = 124.8764, GNorm = 0.2841, lr_0 = 6.2703e-04
Loss = 2.3119e-02, PNorm = 124.9287, GNorm = 0.1738, lr_0 = 6.2661e-04
Loss = 2.2845e-02, PNorm = 124.9743, GNorm = 0.2491, lr_0 = 6.2618e-04
Loss = 2.2134e-02, PNorm = 125.0252, GNorm = 0.3089, lr_0 = 6.2575e-04
Loss = 2.4294e-02, PNorm = 125.0779, GNorm = 0.3765, lr_0 = 6.2532e-04
Loss = 2.4397e-02, PNorm = 125.1263, GNorm = 0.2781, lr_0 = 6.2489e-04
Loss = 2.1969e-02, PNorm = 125.1747, GNorm = 0.2275, lr_0 = 6.2446e-04
Loss = 2.3851e-02, PNorm = 125.2323, GNorm = 0.1965, lr_0 = 6.2403e-04
Loss = 1.8638e-02, PNorm = 125.2818, GNorm = 0.3211, lr_0 = 6.2361e-04
Loss = 2.1954e-02, PNorm = 125.3293, GNorm = 0.3566, lr_0 = 6.2318e-04
Loss = 2.2200e-02, PNorm = 125.3747, GNorm = 0.6720, lr_0 = 6.2275e-04
Loss = 2.0657e-02, PNorm = 125.4276, GNorm = 0.3622, lr_0 = 6.2233e-04
Loss = 2.4064e-02, PNorm = 125.4778, GNorm = 0.2872, lr_0 = 6.2190e-04
Loss = 2.0936e-02, PNorm = 125.5279, GNorm = 0.4775, lr_0 = 6.2147e-04
Loss = 2.5491e-02, PNorm = 125.5726, GNorm = 0.2228, lr_0 = 6.2105e-04
Loss = 1.8496e-02, PNorm = 125.6108, GNorm = 0.2623, lr_0 = 6.2062e-04
Loss = 2.0627e-02, PNorm = 125.6535, GNorm = 0.3686, lr_0 = 6.2020e-04
Loss = 3.1177e-02, PNorm = 125.6993, GNorm = 0.2976, lr_0 = 6.1977e-04
Loss = 2.4053e-02, PNorm = 125.7509, GNorm = 0.5035, lr_0 = 6.1935e-04
Loss = 2.0810e-02, PNorm = 125.8006, GNorm = 0.3431, lr_0 = 6.1892e-04
Loss = 2.2803e-02, PNorm = 125.8532, GNorm = 0.2538, lr_0 = 6.1850e-04
Loss = 2.1879e-02, PNorm = 125.9123, GNorm = 0.6308, lr_0 = 6.1808e-04
Loss = 2.0114e-02, PNorm = 125.9619, GNorm = 0.3611, lr_0 = 6.1765e-04
Loss = 2.3042e-02, PNorm = 126.0104, GNorm = 0.6011, lr_0 = 6.1723e-04
Loss = 2.6799e-02, PNorm = 126.0601, GNorm = 0.1654, lr_0 = 6.1681e-04
Loss = 2.4334e-02, PNorm = 126.1134, GNorm = 0.3786, lr_0 = 6.1638e-04
Loss = 2.4612e-02, PNorm = 126.1551, GNorm = 0.3135, lr_0 = 6.1596e-04
Loss = 2.3489e-02, PNorm = 126.2065, GNorm = 0.2814, lr_0 = 6.1554e-04
Loss = 2.4074e-02, PNorm = 126.2560, GNorm = 0.3590, lr_0 = 6.1512e-04
Loss = 2.2613e-02, PNorm = 126.3090, GNorm = 0.3744, lr_0 = 6.1470e-04
Loss = 2.3687e-02, PNorm = 126.3536, GNorm = 0.2347, lr_0 = 6.1428e-04
Loss = 2.4766e-02, PNorm = 126.4025, GNorm = 0.3857, lr_0 = 6.1385e-04
Loss = 2.1592e-02, PNorm = 126.4543, GNorm = 0.4434, lr_0 = 6.1343e-04
Loss = 3.1880e-02, PNorm = 126.5163, GNorm = 0.4520, lr_0 = 6.1301e-04
Loss = 2.6511e-02, PNorm = 126.5704, GNorm = 0.4265, lr_0 = 6.1259e-04
Loss = 2.6936e-02, PNorm = 126.6241, GNorm = 0.4652, lr_0 = 6.1217e-04
Loss = 2.1758e-02, PNorm = 126.6801, GNorm = 0.1711, lr_0 = 6.1175e-04
Loss = 2.4893e-02, PNorm = 126.7252, GNorm = 0.2109, lr_0 = 6.1134e-04
Loss = 2.5062e-02, PNorm = 126.7818, GNorm = 0.2536, lr_0 = 6.1092e-04
Loss = 2.6053e-02, PNorm = 126.8326, GNorm = 0.3238, lr_0 = 6.1050e-04
Validation mae = 0.489060
Epoch 8
Loss = 1.8978e-02, PNorm = 126.8830, GNorm = 0.6797, lr_0 = 6.1008e-04
Loss = 1.9801e-02, PNorm = 126.9205, GNorm = 0.2532, lr_0 = 6.0966e-04
Loss = 1.9400e-02, PNorm = 126.9584, GNorm = 0.8191, lr_0 = 6.0924e-04
Loss = 1.8704e-02, PNorm = 126.9928, GNorm = 0.2199, lr_0 = 6.0883e-04
Loss = 2.1780e-02, PNorm = 127.0220, GNorm = 0.6207, lr_0 = 6.0841e-04
Loss = 1.9162e-02, PNorm = 127.0529, GNorm = 0.2660, lr_0 = 6.0799e-04
Loss = 1.9559e-02, PNorm = 127.0913, GNorm = 0.2009, lr_0 = 6.0758e-04
Loss = 1.7260e-02, PNorm = 127.1246, GNorm = 0.3536, lr_0 = 6.0716e-04
Loss = 1.7972e-02, PNorm = 127.1556, GNorm = 0.5803, lr_0 = 6.0674e-04
Loss = 2.2652e-02, PNorm = 127.1873, GNorm = 0.3066, lr_0 = 6.0633e-04
Loss = 2.0411e-02, PNorm = 127.2234, GNorm = 0.4236, lr_0 = 6.0591e-04
Loss = 2.4321e-02, PNorm = 127.2631, GNorm = 0.3571, lr_0 = 6.0550e-04
Loss = 1.7713e-02, PNorm = 127.3037, GNorm = 0.3841, lr_0 = 6.0508e-04
Loss = 1.9417e-02, PNorm = 127.3358, GNorm = 0.5528, lr_0 = 6.0467e-04
Loss = 1.8545e-02, PNorm = 127.3709, GNorm = 0.2013, lr_0 = 6.0425e-04
Loss = 1.7178e-02, PNorm = 127.4037, GNorm = 0.2471, lr_0 = 6.0384e-04
Loss = 1.9649e-02, PNorm = 127.4385, GNorm = 0.6635, lr_0 = 6.0343e-04
Loss = 1.8056e-02, PNorm = 127.4687, GNorm = 0.3474, lr_0 = 6.0301e-04
Loss = 1.9793e-02, PNorm = 127.5027, GNorm = 0.4423, lr_0 = 6.0260e-04
Loss = 1.6486e-02, PNorm = 127.5417, GNorm = 0.3321, lr_0 = 6.0219e-04
Loss = 2.0999e-02, PNorm = 127.5767, GNorm = 0.5403, lr_0 = 6.0178e-04
Loss = 2.0280e-02, PNorm = 127.6139, GNorm = 0.2161, lr_0 = 6.0136e-04
Loss = 1.9662e-02, PNorm = 127.6553, GNorm = 0.2637, lr_0 = 6.0095e-04
Loss = 1.6067e-02, PNorm = 127.6946, GNorm = 0.3615, lr_0 = 6.0054e-04
Loss = 1.7994e-02, PNorm = 127.7301, GNorm = 0.1454, lr_0 = 6.0013e-04
Loss = 2.0105e-02, PNorm = 127.7611, GNorm = 0.4889, lr_0 = 5.9972e-04
Loss = 1.5765e-02, PNorm = 127.7974, GNorm = 0.1599, lr_0 = 5.9931e-04
Loss = 2.0826e-02, PNorm = 127.8286, GNorm = 0.3569, lr_0 = 5.9890e-04
Loss = 1.7025e-02, PNorm = 127.8603, GNorm = 0.3763, lr_0 = 5.9849e-04
Loss = 1.6388e-02, PNorm = 127.9002, GNorm = 0.4411, lr_0 = 5.9808e-04
Loss = 1.5545e-02, PNorm = 127.9338, GNorm = 0.1239, lr_0 = 5.9767e-04
Loss = 1.8480e-02, PNorm = 127.9748, GNorm = 0.2603, lr_0 = 5.9726e-04
Loss = 1.7113e-02, PNorm = 128.0084, GNorm = 0.7726, lr_0 = 5.9685e-04
Loss = 1.8189e-02, PNorm = 128.0490, GNorm = 0.3454, lr_0 = 5.9644e-04
Loss = 2.0611e-02, PNorm = 128.0897, GNorm = 0.6109, lr_0 = 5.9603e-04
Loss = 1.6966e-02, PNorm = 128.1328, GNorm = 0.4885, lr_0 = 5.9562e-04
Loss = 1.7051e-02, PNorm = 128.1662, GNorm = 0.4318, lr_0 = 5.9521e-04
Loss = 1.6765e-02, PNorm = 128.1993, GNorm = 0.2743, lr_0 = 5.9481e-04
Loss = 1.6458e-02, PNorm = 128.2310, GNorm = 0.7021, lr_0 = 5.9440e-04
Loss = 1.4896e-02, PNorm = 128.2645, GNorm = 0.3635, lr_0 = 5.9399e-04
Loss = 1.7465e-02, PNorm = 128.2923, GNorm = 0.2090, lr_0 = 5.9358e-04
Loss = 1.6818e-02, PNorm = 128.3275, GNorm = 0.3553, lr_0 = 5.9318e-04
Loss = 1.7137e-02, PNorm = 128.3621, GNorm = 0.6878, lr_0 = 5.9277e-04
Loss = 2.2127e-02, PNorm = 128.4027, GNorm = 0.2601, lr_0 = 5.9236e-04
Loss = 1.8956e-02, PNorm = 128.4416, GNorm = 0.1946, lr_0 = 5.9196e-04
Loss = 2.5084e-02, PNorm = 128.4759, GNorm = 0.2040, lr_0 = 5.9155e-04
Loss = 1.5225e-02, PNorm = 128.5170, GNorm = 0.4452, lr_0 = 5.9115e-04
Loss = 1.9108e-02, PNorm = 128.5550, GNorm = 0.5316, lr_0 = 5.9074e-04
Loss = 1.8372e-02, PNorm = 128.5947, GNorm = 0.7446, lr_0 = 5.9034e-04
Loss = 1.7292e-02, PNorm = 128.6318, GNorm = 0.3757, lr_0 = 5.8993e-04
Loss = 1.9079e-02, PNorm = 128.6702, GNorm = 0.6328, lr_0 = 5.8953e-04
Loss = 1.5848e-02, PNorm = 128.7106, GNorm = 0.5565, lr_0 = 5.8913e-04
Loss = 1.7238e-02, PNorm = 128.7489, GNorm = 0.3831, lr_0 = 5.8872e-04
Loss = 1.6883e-02, PNorm = 128.7931, GNorm = 0.1789, lr_0 = 5.8832e-04
Loss = 1.5788e-02, PNorm = 128.8368, GNorm = 0.1937, lr_0 = 5.8792e-04
Loss = 1.8989e-02, PNorm = 128.8671, GNorm = 0.5960, lr_0 = 5.8751e-04
Loss = 1.8877e-02, PNorm = 128.9038, GNorm = 0.2580, lr_0 = 5.8711e-04
Loss = 1.7413e-02, PNorm = 128.9368, GNorm = 0.2467, lr_0 = 5.8671e-04
Loss = 1.8803e-02, PNorm = 128.9727, GNorm = 0.1933, lr_0 = 5.8631e-04
Loss = 1.7489e-02, PNorm = 129.0117, GNorm = 0.2400, lr_0 = 5.8591e-04
Loss = 1.7959e-02, PNorm = 129.0508, GNorm = 0.4147, lr_0 = 5.8550e-04
Loss = 1.8108e-02, PNorm = 129.0919, GNorm = 0.2259, lr_0 = 5.8510e-04
Loss = 1.9269e-02, PNorm = 129.1327, GNorm = 0.2897, lr_0 = 5.8470e-04
Loss = 1.7929e-02, PNorm = 129.1721, GNorm = 0.3372, lr_0 = 5.8430e-04
Loss = 1.8211e-02, PNorm = 129.2098, GNorm = 0.4062, lr_0 = 5.8390e-04
Loss = 1.6484e-02, PNorm = 129.2480, GNorm = 0.6576, lr_0 = 5.8350e-04
Loss = 2.0807e-02, PNorm = 129.2932, GNorm = 0.7273, lr_0 = 5.8310e-04
Loss = 1.6333e-02, PNorm = 129.3333, GNorm = 0.2758, lr_0 = 5.8270e-04
Loss = 1.8339e-02, PNorm = 129.3698, GNorm = 0.4600, lr_0 = 5.8230e-04
Loss = 2.1311e-02, PNorm = 129.4132, GNorm = 0.2517, lr_0 = 5.8190e-04
Loss = 1.9270e-02, PNorm = 129.4547, GNorm = 0.5762, lr_0 = 5.8151e-04
Loss = 1.7303e-02, PNorm = 129.4925, GNorm = 0.3365, lr_0 = 5.8111e-04
Loss = 1.7114e-02, PNorm = 129.5333, GNorm = 0.4839, lr_0 = 5.8071e-04
Loss = 1.6150e-02, PNorm = 129.5775, GNorm = 0.4473, lr_0 = 5.8031e-04
Loss = 1.8249e-02, PNorm = 129.6135, GNorm = 0.3412, lr_0 = 5.7991e-04
Loss = 1.7696e-02, PNorm = 129.6511, GNorm = 0.5869, lr_0 = 5.7952e-04
Loss = 1.9055e-02, PNorm = 129.6900, GNorm = 0.2613, lr_0 = 5.7912e-04
Loss = 1.9157e-02, PNorm = 129.7269, GNorm = 0.5260, lr_0 = 5.7872e-04
Loss = 2.1204e-02, PNorm = 129.7705, GNorm = 0.4113, lr_0 = 5.7833e-04
Loss = 2.2396e-02, PNorm = 129.8109, GNorm = 0.4437, lr_0 = 5.7793e-04
Loss = 1.9382e-02, PNorm = 129.8502, GNorm = 0.4887, lr_0 = 5.7753e-04
Loss = 2.2819e-02, PNorm = 129.8887, GNorm = 0.5712, lr_0 = 5.7714e-04
Loss = 1.6079e-02, PNorm = 129.9326, GNorm = 0.3008, lr_0 = 5.7674e-04
Loss = 1.7159e-02, PNorm = 129.9723, GNorm = 0.4039, lr_0 = 5.7635e-04
Loss = 2.0809e-02, PNorm = 130.0144, GNorm = 0.1766, lr_0 = 5.7595e-04
Loss = 2.1020e-02, PNorm = 130.0592, GNorm = 0.5415, lr_0 = 5.7556e-04
Loss = 1.6479e-02, PNorm = 130.0946, GNorm = 0.3828, lr_0 = 5.7516e-04
Loss = 1.8720e-02, PNorm = 130.1369, GNorm = 0.3363, lr_0 = 5.7477e-04
Loss = 1.6923e-02, PNorm = 130.1796, GNorm = 0.7727, lr_0 = 5.7438e-04
Loss = 1.7375e-02, PNorm = 130.2182, GNorm = 0.6417, lr_0 = 5.7398e-04
Loss = 1.5908e-02, PNorm = 130.2572, GNorm = 0.1753, lr_0 = 5.7359e-04
Loss = 1.7205e-02, PNorm = 130.2930, GNorm = 0.2528, lr_0 = 5.7320e-04
Loss = 1.6474e-02, PNorm = 130.3347, GNorm = 0.1929, lr_0 = 5.7280e-04
Loss = 1.8113e-02, PNorm = 130.3705, GNorm = 0.2464, lr_0 = 5.7241e-04
Loss = 1.5651e-02, PNorm = 130.4052, GNorm = 0.6160, lr_0 = 5.7202e-04
Loss = 1.7197e-02, PNorm = 130.4476, GNorm = 0.1940, lr_0 = 5.7163e-04
Loss = 1.7555e-02, PNorm = 130.4903, GNorm = 0.4757, lr_0 = 5.7124e-04
Loss = 1.8884e-02, PNorm = 130.5335, GNorm = 0.7977, lr_0 = 5.7084e-04
Loss = 1.8674e-02, PNorm = 130.5697, GNorm = 0.1678, lr_0 = 5.7045e-04
Loss = 1.7621e-02, PNorm = 130.6090, GNorm = 0.1405, lr_0 = 5.7006e-04
Loss = 1.7157e-02, PNorm = 130.6459, GNorm = 0.2991, lr_0 = 5.6967e-04
Loss = 1.7823e-02, PNorm = 130.6839, GNorm = 0.4385, lr_0 = 5.6928e-04
Loss = 1.7340e-02, PNorm = 130.7217, GNorm = 0.4897, lr_0 = 5.6889e-04
Loss = 1.9904e-02, PNorm = 130.7627, GNorm = 0.4613, lr_0 = 5.6850e-04
Loss = 1.8402e-02, PNorm = 130.8055, GNorm = 0.2028, lr_0 = 5.6811e-04
Loss = 1.7591e-02, PNorm = 130.8537, GNorm = 0.6354, lr_0 = 5.6772e-04
Loss = 1.9620e-02, PNorm = 130.8994, GNorm = 0.2173, lr_0 = 5.6733e-04
Loss = 1.8397e-02, PNorm = 130.9425, GNorm = 0.2780, lr_0 = 5.6695e-04
Loss = 1.7332e-02, PNorm = 130.9765, GNorm = 0.3791, lr_0 = 5.6656e-04
Loss = 1.8405e-02, PNorm = 131.0131, GNorm = 0.4991, lr_0 = 5.6617e-04
Loss = 1.7594e-02, PNorm = 131.0527, GNorm = 0.4448, lr_0 = 5.6578e-04
Loss = 1.9506e-02, PNorm = 131.0963, GNorm = 0.9095, lr_0 = 5.6539e-04
Loss = 1.7106e-02, PNorm = 131.1415, GNorm = 0.1401, lr_0 = 5.6501e-04
Loss = 2.3318e-02, PNorm = 131.1852, GNorm = 0.2506, lr_0 = 5.6462e-04
Loss = 1.6633e-02, PNorm = 131.2288, GNorm = 0.3261, lr_0 = 5.6423e-04
Loss = 1.9815e-02, PNorm = 131.2709, GNorm = 0.4439, lr_0 = 5.6385e-04
Loss = 1.7409e-02, PNorm = 131.3154, GNorm = 0.1956, lr_0 = 5.6346e-04
Loss = 1.6290e-02, PNorm = 131.3530, GNorm = 0.5942, lr_0 = 5.6307e-04
Loss = 1.6948e-02, PNorm = 131.3894, GNorm = 0.2843, lr_0 = 5.6269e-04
Loss = 2.0177e-02, PNorm = 131.4249, GNorm = 0.5445, lr_0 = 5.6230e-04
Validation mae = 0.483990
Epoch 9
Loss = 1.5080e-02, PNorm = 131.4599, GNorm = 0.2512, lr_0 = 5.6192e-04
Loss = 1.7053e-02, PNorm = 131.4901, GNorm = 0.3629, lr_0 = 5.6153e-04
Loss = 1.4902e-02, PNorm = 131.5160, GNorm = 0.1745, lr_0 = 5.6115e-04
Loss = 1.8382e-02, PNorm = 131.5407, GNorm = 0.4595, lr_0 = 5.6076e-04
Loss = 1.8407e-02, PNorm = 131.5692, GNorm = 0.2217, lr_0 = 5.6038e-04
Loss = 1.5827e-02, PNorm = 131.6013, GNorm = 0.4131, lr_0 = 5.6000e-04
Loss = 1.4417e-02, PNorm = 131.6373, GNorm = 0.4148, lr_0 = 5.5961e-04
Loss = 1.7799e-02, PNorm = 131.6701, GNorm = 0.6677, lr_0 = 5.5923e-04
Loss = 1.5345e-02, PNorm = 131.6963, GNorm = 0.1866, lr_0 = 5.5885e-04
Loss = 1.6211e-02, PNorm = 131.7325, GNorm = 0.2896, lr_0 = 5.5846e-04
Loss = 1.4683e-02, PNorm = 131.7654, GNorm = 0.2309, lr_0 = 5.5808e-04
Loss = 1.3840e-02, PNorm = 131.7920, GNorm = 0.5486, lr_0 = 5.5770e-04
Loss = 1.4663e-02, PNorm = 131.8188, GNorm = 0.2388, lr_0 = 5.5732e-04
Loss = 1.6687e-02, PNorm = 131.8512, GNorm = 0.2734, lr_0 = 5.5693e-04
Loss = 1.5001e-02, PNorm = 131.8860, GNorm = 0.2236, lr_0 = 5.5655e-04
Loss = 1.3345e-02, PNorm = 131.9169, GNorm = 0.3790, lr_0 = 5.5617e-04
Loss = 1.4057e-02, PNorm = 131.9429, GNorm = 0.5895, lr_0 = 5.5579e-04
Loss = 1.5933e-02, PNorm = 131.9700, GNorm = 0.3860, lr_0 = 5.5541e-04
Loss = 1.2542e-02, PNorm = 132.0036, GNorm = 0.1236, lr_0 = 5.5503e-04
Loss = 1.6548e-02, PNorm = 132.0316, GNorm = 0.4356, lr_0 = 5.5465e-04
Loss = 1.3199e-02, PNorm = 132.0637, GNorm = 0.1898, lr_0 = 5.5427e-04
Loss = 1.3644e-02, PNorm = 132.0967, GNorm = 0.3019, lr_0 = 5.5389e-04
Loss = 1.6205e-02, PNorm = 132.1255, GNorm = 0.3422, lr_0 = 5.5351e-04
Loss = 1.6100e-02, PNorm = 132.1522, GNorm = 0.3098, lr_0 = 5.5313e-04
Loss = 1.3416e-02, PNorm = 132.1787, GNorm = 0.2677, lr_0 = 5.5275e-04
Loss = 1.2762e-02, PNorm = 132.2076, GNorm = 0.2117, lr_0 = 5.5237e-04
Loss = 1.5986e-02, PNorm = 132.2361, GNorm = 0.3934, lr_0 = 5.5199e-04
Loss = 1.5941e-02, PNorm = 132.2679, GNorm = 0.8072, lr_0 = 5.5162e-04
Loss = 2.0777e-02, PNorm = 132.3037, GNorm = 0.4313, lr_0 = 5.5124e-04
Loss = 1.3303e-02, PNorm = 132.3353, GNorm = 0.1279, lr_0 = 5.5086e-04
Loss = 1.5127e-02, PNorm = 132.3627, GNorm = 0.3362, lr_0 = 5.5048e-04
Loss = 1.3393e-02, PNorm = 132.3953, GNorm = 0.2166, lr_0 = 5.5011e-04
Loss = 1.6004e-02, PNorm = 132.4255, GNorm = 0.8068, lr_0 = 5.4973e-04
Loss = 1.5186e-02, PNorm = 132.4583, GNorm = 0.2177, lr_0 = 5.4935e-04
Loss = 1.4204e-02, PNorm = 132.4896, GNorm = 0.3938, lr_0 = 5.4898e-04
Loss = 1.3187e-02, PNorm = 132.5184, GNorm = 0.2084, lr_0 = 5.4860e-04
Loss = 1.2926e-02, PNorm = 132.5520, GNorm = 0.4543, lr_0 = 5.4822e-04
Loss = 1.4182e-02, PNorm = 132.5834, GNorm = 0.3785, lr_0 = 5.4785e-04
Loss = 1.3575e-02, PNorm = 132.6145, GNorm = 0.7617, lr_0 = 5.4747e-04
Loss = 1.4882e-02, PNorm = 132.6403, GNorm = 0.3241, lr_0 = 5.4710e-04
Loss = 1.5095e-02, PNorm = 132.6746, GNorm = 0.1351, lr_0 = 5.4672e-04
Loss = 1.6174e-02, PNorm = 132.7039, GNorm = 0.3142, lr_0 = 5.4635e-04
Loss = 1.5547e-02, PNorm = 132.7356, GNorm = 0.2674, lr_0 = 5.4597e-04
Loss = 1.3130e-02, PNorm = 132.7682, GNorm = 0.1636, lr_0 = 5.4560e-04
Loss = 1.4508e-02, PNorm = 132.7980, GNorm = 0.5824, lr_0 = 5.4523e-04
Loss = 1.3263e-02, PNorm = 132.8250, GNorm = 0.7635, lr_0 = 5.4485e-04
Loss = 1.3650e-02, PNorm = 132.8525, GNorm = 0.4711, lr_0 = 5.4448e-04
Loss = 1.9347e-02, PNorm = 132.8813, GNorm = 0.5883, lr_0 = 5.4411e-04
Loss = 1.5921e-02, PNorm = 132.9192, GNorm = 0.3012, lr_0 = 5.4373e-04
Loss = 1.5339e-02, PNorm = 132.9536, GNorm = 0.2488, lr_0 = 5.4336e-04
Loss = 1.3031e-02, PNorm = 132.9927, GNorm = 0.2606, lr_0 = 5.4299e-04
Loss = 1.4984e-02, PNorm = 133.0272, GNorm = 0.1764, lr_0 = 5.4262e-04
Loss = 1.3905e-02, PNorm = 133.0621, GNorm = 0.4652, lr_0 = 5.4225e-04
Loss = 1.7799e-02, PNorm = 133.0931, GNorm = 0.2992, lr_0 = 5.4187e-04
Loss = 1.2447e-02, PNorm = 133.1242, GNorm = 0.2372, lr_0 = 5.4150e-04
Loss = 1.5451e-02, PNorm = 133.1607, GNorm = 0.1972, lr_0 = 5.4113e-04
Loss = 1.3444e-02, PNorm = 133.1913, GNorm = 0.2350, lr_0 = 5.4076e-04
Loss = 1.6040e-02, PNorm = 133.2210, GNorm = 0.2313, lr_0 = 5.4039e-04
Loss = 1.6781e-02, PNorm = 133.2584, GNorm = 0.3383, lr_0 = 5.4002e-04
Loss = 1.3615e-02, PNorm = 133.2944, GNorm = 0.2686, lr_0 = 5.3965e-04
Loss = 1.4575e-02, PNorm = 133.3290, GNorm = 0.3788, lr_0 = 5.3928e-04
Loss = 1.6560e-02, PNorm = 133.3723, GNorm = 0.2201, lr_0 = 5.3891e-04
Loss = 1.7063e-02, PNorm = 133.4042, GNorm = 0.5377, lr_0 = 5.3854e-04
Loss = 1.4263e-02, PNorm = 133.4380, GNorm = 0.4101, lr_0 = 5.3817e-04
Loss = 1.5005e-02, PNorm = 133.4751, GNorm = 0.3372, lr_0 = 5.3781e-04
Loss = 1.2853e-02, PNorm = 133.5076, GNorm = 0.3489, lr_0 = 5.3744e-04
Loss = 1.3754e-02, PNorm = 133.5441, GNorm = 0.7010, lr_0 = 5.3707e-04
Loss = 1.4710e-02, PNorm = 133.5798, GNorm = 0.2935, lr_0 = 5.3670e-04
Loss = 1.3391e-02, PNorm = 133.6131, GNorm = 0.4665, lr_0 = 5.3633e-04
Loss = 1.6423e-02, PNorm = 133.6480, GNorm = 0.4742, lr_0 = 5.3597e-04
Loss = 1.5383e-02, PNorm = 133.6823, GNorm = 0.6793, lr_0 = 5.3560e-04
Loss = 1.4205e-02, PNorm = 133.7166, GNorm = 0.1172, lr_0 = 5.3523e-04
Loss = 1.5600e-02, PNorm = 133.7527, GNorm = 0.3981, lr_0 = 5.3486e-04
Loss = 1.5173e-02, PNorm = 133.7889, GNorm = 0.2554, lr_0 = 5.3450e-04
Loss = 1.4868e-02, PNorm = 133.8263, GNorm = 0.3748, lr_0 = 5.3413e-04
Loss = 1.5431e-02, PNorm = 133.8595, GNorm = 0.2514, lr_0 = 5.3377e-04
Loss = 1.4194e-02, PNorm = 133.8934, GNorm = 0.2856, lr_0 = 5.3340e-04
Loss = 1.4180e-02, PNorm = 133.9248, GNorm = 0.2161, lr_0 = 5.3304e-04
Loss = 1.4876e-02, PNorm = 133.9624, GNorm = 0.3172, lr_0 = 5.3267e-04
Loss = 1.5113e-02, PNorm = 133.9962, GNorm = 0.1869, lr_0 = 5.3231e-04
Loss = 1.6312e-02, PNorm = 134.0287, GNorm = 0.3641, lr_0 = 5.3194e-04
Loss = 1.5802e-02, PNorm = 134.0618, GNorm = 0.4829, lr_0 = 5.3158e-04
Loss = 1.4901e-02, PNorm = 134.1040, GNorm = 0.1585, lr_0 = 5.3121e-04
Loss = 1.7105e-02, PNorm = 134.1442, GNorm = 0.6289, lr_0 = 5.3085e-04
Loss = 2.1712e-02, PNorm = 134.1790, GNorm = 0.4446, lr_0 = 5.3048e-04
Loss = 1.7113e-02, PNorm = 134.2151, GNorm = 0.4230, lr_0 = 5.3012e-04
Loss = 1.5728e-02, PNorm = 134.2542, GNorm = 0.4842, lr_0 = 5.2976e-04
Loss = 1.6311e-02, PNorm = 134.2976, GNorm = 0.4153, lr_0 = 5.2939e-04
Loss = 1.4291e-02, PNorm = 134.3423, GNorm = 0.2366, lr_0 = 5.2903e-04
Loss = 1.6197e-02, PNorm = 134.3822, GNorm = 0.3316, lr_0 = 5.2867e-04
Loss = 1.4309e-02, PNorm = 134.4176, GNorm = 0.2889, lr_0 = 5.2831e-04
Loss = 1.3450e-02, PNorm = 134.4494, GNorm = 0.2854, lr_0 = 5.2795e-04
Loss = 1.4413e-02, PNorm = 134.4805, GNorm = 0.4832, lr_0 = 5.2758e-04
Loss = 1.3898e-02, PNorm = 134.5146, GNorm = 0.2948, lr_0 = 5.2722e-04
Loss = 1.5055e-02, PNorm = 134.5490, GNorm = 0.5201, lr_0 = 5.2686e-04
Loss = 1.5137e-02, PNorm = 134.5821, GNorm = 0.5820, lr_0 = 5.2650e-04
Loss = 1.4888e-02, PNorm = 134.6223, GNorm = 0.2072, lr_0 = 5.2614e-04
Loss = 1.3455e-02, PNorm = 134.6603, GNorm = 0.4504, lr_0 = 5.2578e-04
Loss = 1.4864e-02, PNorm = 134.6981, GNorm = 0.3314, lr_0 = 5.2542e-04
Loss = 1.5303e-02, PNorm = 134.7336, GNorm = 0.5860, lr_0 = 5.2506e-04
Loss = 1.8819e-02, PNorm = 134.7698, GNorm = 0.5926, lr_0 = 5.2470e-04
Loss = 1.6042e-02, PNorm = 134.8053, GNorm = 0.5086, lr_0 = 5.2434e-04
Loss = 1.7248e-02, PNorm = 134.8354, GNorm = 0.2726, lr_0 = 5.2398e-04
Loss = 1.5747e-02, PNorm = 134.8690, GNorm = 0.1284, lr_0 = 5.2362e-04
Loss = 1.1220e-02, PNorm = 134.9042, GNorm = 0.1957, lr_0 = 5.2326e-04
Loss = 1.6825e-02, PNorm = 134.9389, GNorm = 0.2615, lr_0 = 5.2290e-04
Loss = 1.4747e-02, PNorm = 134.9798, GNorm = 0.3182, lr_0 = 5.2255e-04
Loss = 1.7085e-02, PNorm = 135.0170, GNorm = 0.1548, lr_0 = 5.2219e-04
Loss = 1.2909e-02, PNorm = 135.0539, GNorm = 0.8781, lr_0 = 5.2183e-04
Loss = 1.5364e-02, PNorm = 135.0881, GNorm = 0.7012, lr_0 = 5.2147e-04
Loss = 1.6448e-02, PNorm = 135.1242, GNorm = 0.5700, lr_0 = 5.2112e-04
Loss = 1.8598e-02, PNorm = 135.1614, GNorm = 0.3044, lr_0 = 5.2076e-04
Loss = 1.3860e-02, PNorm = 135.1999, GNorm = 0.3339, lr_0 = 5.2040e-04
Loss = 1.8195e-02, PNorm = 135.2332, GNorm = 0.4636, lr_0 = 5.2005e-04
Loss = 1.7737e-02, PNorm = 135.2655, GNorm = 0.7754, lr_0 = 5.1969e-04
Loss = 1.4844e-02, PNorm = 135.2976, GNorm = 0.3942, lr_0 = 5.1933e-04
Loss = 1.3762e-02, PNorm = 135.3370, GNorm = 0.5447, lr_0 = 5.1898e-04
Loss = 1.5829e-02, PNorm = 135.3704, GNorm = 0.6287, lr_0 = 5.1862e-04
Loss = 1.7199e-02, PNorm = 135.4026, GNorm = 0.1191, lr_0 = 5.1827e-04
Loss = 1.8786e-02, PNorm = 135.4396, GNorm = 0.6846, lr_0 = 5.1791e-04
Validation mae = 0.487156
Epoch 10
Loss = 1.2902e-02, PNorm = 135.4669, GNorm = 0.1597, lr_0 = 5.1756e-04
Loss = 1.4589e-02, PNorm = 135.4877, GNorm = 0.3791, lr_0 = 5.1720e-04
Loss = 1.1200e-02, PNorm = 135.5097, GNorm = 0.3116, lr_0 = 5.1685e-04
Loss = 1.3373e-02, PNorm = 135.5384, GNorm = 0.1729, lr_0 = 5.1649e-04
Loss = 1.3387e-02, PNorm = 135.5661, GNorm = 0.2478, lr_0 = 5.1614e-04
Loss = 1.1600e-02, PNorm = 135.5958, GNorm = 0.6537, lr_0 = 5.1579e-04
Loss = 1.2558e-02, PNorm = 135.6231, GNorm = 0.8695, lr_0 = 5.1543e-04
Loss = 1.1333e-02, PNorm = 135.6439, GNorm = 0.2027, lr_0 = 5.1508e-04
Loss = 1.2407e-02, PNorm = 135.6682, GNorm = 0.5009, lr_0 = 5.1473e-04
Loss = 1.3875e-02, PNorm = 135.6939, GNorm = 0.3978, lr_0 = 5.1437e-04
Loss = 1.5171e-02, PNorm = 135.7195, GNorm = 0.1538, lr_0 = 5.1402e-04
Loss = 1.1630e-02, PNorm = 135.7432, GNorm = 0.3225, lr_0 = 5.1367e-04
Loss = 1.1944e-02, PNorm = 135.7688, GNorm = 0.2964, lr_0 = 5.1332e-04
Loss = 1.3112e-02, PNorm = 135.7973, GNorm = 0.3138, lr_0 = 5.1297e-04
Loss = 1.1013e-02, PNorm = 135.8261, GNorm = 0.3431, lr_0 = 5.1262e-04
Loss = 1.4050e-02, PNorm = 135.8525, GNorm = 0.1569, lr_0 = 5.1226e-04
Loss = 1.5337e-02, PNorm = 135.8763, GNorm = 0.5930, lr_0 = 5.1191e-04
Loss = 1.3703e-02, PNorm = 135.8984, GNorm = 0.3292, lr_0 = 5.1156e-04
Loss = 1.2365e-02, PNorm = 135.9231, GNorm = 0.1764, lr_0 = 5.1121e-04
Loss = 1.4858e-02, PNorm = 135.9445, GNorm = 0.2985, lr_0 = 5.1086e-04
Loss = 1.2793e-02, PNorm = 135.9716, GNorm = 0.3660, lr_0 = 5.1051e-04
Loss = 1.4128e-02, PNorm = 136.0014, GNorm = 0.3146, lr_0 = 5.1016e-04
Loss = 1.1618e-02, PNorm = 136.0291, GNorm = 0.4496, lr_0 = 5.0981e-04
Loss = 1.3371e-02, PNorm = 136.0554, GNorm = 0.5057, lr_0 = 5.0946e-04
Loss = 1.2943e-02, PNorm = 136.0784, GNorm = 0.2588, lr_0 = 5.0911e-04
Loss = 1.3775e-02, PNorm = 136.0994, GNorm = 0.3342, lr_0 = 5.0877e-04
Loss = 1.0786e-02, PNorm = 136.1252, GNorm = 0.3029, lr_0 = 5.0842e-04
Loss = 9.2500e-03, PNorm = 136.1504, GNorm = 0.1594, lr_0 = 5.0807e-04
Loss = 1.2772e-02, PNorm = 136.1735, GNorm = 0.1400, lr_0 = 5.0772e-04
Loss = 1.3410e-02, PNorm = 136.1974, GNorm = 0.1872, lr_0 = 5.0737e-04
Loss = 1.1305e-02, PNorm = 136.2228, GNorm = 0.3757, lr_0 = 5.0703e-04
Loss = 1.3668e-02, PNorm = 136.2497, GNorm = 0.1187, lr_0 = 5.0668e-04
Loss = 1.0937e-02, PNorm = 136.2752, GNorm = 0.1420, lr_0 = 5.0633e-04
Loss = 1.3545e-02, PNorm = 136.2993, GNorm = 0.2809, lr_0 = 5.0598e-04
Loss = 1.5945e-02, PNorm = 136.3319, GNorm = 0.4260, lr_0 = 5.0564e-04
Loss = 1.2406e-02, PNorm = 136.3612, GNorm = 0.3791, lr_0 = 5.0529e-04
Loss = 1.2432e-02, PNorm = 136.3908, GNorm = 0.3598, lr_0 = 5.0494e-04
Loss = 1.5411e-02, PNorm = 136.4207, GNorm = 0.2852, lr_0 = 5.0460e-04
Loss = 1.1926e-02, PNorm = 136.4496, GNorm = 0.5074, lr_0 = 5.0425e-04
Loss = 1.3644e-02, PNorm = 136.4824, GNorm = 0.2939, lr_0 = 5.0391e-04
Loss = 1.1356e-02, PNorm = 136.5093, GNorm = 0.2868, lr_0 = 5.0356e-04
Loss = 1.3012e-02, PNorm = 136.5357, GNorm = 0.3978, lr_0 = 5.0322e-04
Loss = 1.1569e-02, PNorm = 136.5648, GNorm = 0.3950, lr_0 = 5.0287e-04
Loss = 1.5350e-02, PNorm = 136.5906, GNorm = 0.2971, lr_0 = 5.0253e-04
Loss = 1.1301e-02, PNorm = 136.6171, GNorm = 0.2813, lr_0 = 5.0218e-04
Loss = 1.2327e-02, PNorm = 136.6480, GNorm = 0.3213, lr_0 = 5.0184e-04
Loss = 1.4114e-02, PNorm = 136.6736, GNorm = 0.1706, lr_0 = 5.0150e-04
Loss = 1.1509e-02, PNorm = 136.7024, GNorm = 0.2553, lr_0 = 5.0115e-04
Loss = 1.2276e-02, PNorm = 136.7309, GNorm = 0.3905, lr_0 = 5.0081e-04
Loss = 1.1444e-02, PNorm = 136.7638, GNorm = 0.3001, lr_0 = 5.0047e-04
Loss = 1.1545e-02, PNorm = 136.7930, GNorm = 0.1729, lr_0 = 5.0012e-04
Loss = 1.3587e-02, PNorm = 136.8162, GNorm = 0.3902, lr_0 = 4.9978e-04
Loss = 1.0685e-02, PNorm = 136.8395, GNorm = 0.2805, lr_0 = 4.9944e-04
Loss = 1.1766e-02, PNorm = 136.8656, GNorm = 0.2327, lr_0 = 4.9910e-04
Loss = 1.1686e-02, PNorm = 136.8959, GNorm = 0.2428, lr_0 = 4.9875e-04
Loss = 1.3114e-02, PNorm = 136.9282, GNorm = 0.4406, lr_0 = 4.9841e-04
Loss = 1.0990e-02, PNorm = 136.9584, GNorm = 0.9343, lr_0 = 4.9807e-04
Loss = 1.1681e-02, PNorm = 136.9879, GNorm = 0.4019, lr_0 = 4.9773e-04
Loss = 1.1658e-02, PNorm = 137.0179, GNorm = 0.1696, lr_0 = 4.9739e-04
Loss = 1.0964e-02, PNorm = 137.0492, GNorm = 0.3219, lr_0 = 4.9705e-04
Loss = 1.1463e-02, PNorm = 137.0733, GNorm = 0.2982, lr_0 = 4.9671e-04
Loss = 1.3611e-02, PNorm = 137.0969, GNorm = 0.4661, lr_0 = 4.9637e-04
Loss = 1.0487e-02, PNorm = 137.1266, GNorm = 0.2834, lr_0 = 4.9603e-04
Loss = 1.2587e-02, PNorm = 137.1501, GNorm = 0.2185, lr_0 = 4.9569e-04
Loss = 1.1342e-02, PNorm = 137.1762, GNorm = 0.1908, lr_0 = 4.9535e-04
Loss = 1.3695e-02, PNorm = 137.2076, GNorm = 0.1237, lr_0 = 4.9501e-04
Loss = 1.1164e-02, PNorm = 137.2361, GNorm = 0.4725, lr_0 = 4.9467e-04
Loss = 1.0428e-02, PNorm = 137.2656, GNorm = 0.4299, lr_0 = 4.9433e-04
Loss = 1.2966e-02, PNorm = 137.2951, GNorm = 0.5885, lr_0 = 4.9399e-04
Loss = 1.1382e-02, PNorm = 137.3203, GNorm = 0.4009, lr_0 = 4.9365e-04
Loss = 9.8847e-03, PNorm = 137.3415, GNorm = 0.2911, lr_0 = 4.9332e-04
Loss = 1.1919e-02, PNorm = 137.3666, GNorm = 0.2270, lr_0 = 4.9298e-04
Loss = 1.2236e-02, PNorm = 137.3928, GNorm = 0.3598, lr_0 = 4.9264e-04
Loss = 1.4322e-02, PNorm = 137.4243, GNorm = 0.3707, lr_0 = 4.9230e-04
Loss = 1.0256e-02, PNorm = 137.4539, GNorm = 0.2963, lr_0 = 4.9197e-04
Loss = 1.1964e-02, PNorm = 137.4839, GNorm = 0.2122, lr_0 = 4.9163e-04
Loss = 1.2260e-02, PNorm = 137.5115, GNorm = 0.1745, lr_0 = 4.9129e-04
Loss = 1.4042e-02, PNorm = 137.5396, GNorm = 0.3303, lr_0 = 4.9095e-04
Loss = 1.1894e-02, PNorm = 137.5629, GNorm = 0.2843, lr_0 = 4.9062e-04
Loss = 1.1605e-02, PNorm = 137.5856, GNorm = 0.6030, lr_0 = 4.9028e-04
Loss = 1.1418e-02, PNorm = 137.6118, GNorm = 0.2974, lr_0 = 4.8995e-04
Loss = 1.3255e-02, PNorm = 137.6462, GNorm = 0.2402, lr_0 = 4.8961e-04
Loss = 1.4505e-02, PNorm = 137.6775, GNorm = 0.1544, lr_0 = 4.8928e-04
Loss = 1.1427e-02, PNorm = 137.7084, GNorm = 0.4463, lr_0 = 4.8894e-04
Loss = 1.4177e-02, PNorm = 137.7412, GNorm = 0.2371, lr_0 = 4.8861e-04
Loss = 1.3290e-02, PNorm = 137.7706, GNorm = 0.4668, lr_0 = 4.8827e-04
Loss = 1.2301e-02, PNorm = 137.8036, GNorm = 0.1678, lr_0 = 4.8794e-04
Loss = 1.3179e-02, PNorm = 137.8370, GNorm = 0.2266, lr_0 = 4.8760e-04
Loss = 1.1555e-02, PNorm = 137.8733, GNorm = 0.2139, lr_0 = 4.8727e-04
Loss = 1.1162e-02, PNorm = 137.9041, GNorm = 0.2579, lr_0 = 4.8693e-04
Loss = 1.3117e-02, PNorm = 137.9268, GNorm = 0.3355, lr_0 = 4.8660e-04
Loss = 1.3213e-02, PNorm = 137.9542, GNorm = 0.1971, lr_0 = 4.8627e-04
Loss = 1.2752e-02, PNorm = 137.9810, GNorm = 0.3558, lr_0 = 4.8593e-04
Loss = 1.1083e-02, PNorm = 138.0127, GNorm = 0.5736, lr_0 = 4.8560e-04
Loss = 1.4523e-02, PNorm = 138.0425, GNorm = 0.3130, lr_0 = 4.8527e-04
Loss = 1.3150e-02, PNorm = 138.0692, GNorm = 0.1433, lr_0 = 4.8494e-04
Loss = 1.3712e-02, PNorm = 138.0930, GNorm = 0.3785, lr_0 = 4.8460e-04
Loss = 1.1272e-02, PNorm = 138.1212, GNorm = 0.1443, lr_0 = 4.8427e-04
Loss = 1.1055e-02, PNorm = 138.1480, GNorm = 0.1187, lr_0 = 4.8394e-04
Loss = 9.5293e-03, PNorm = 138.1762, GNorm = 0.1923, lr_0 = 4.8361e-04
Loss = 1.2399e-02, PNorm = 138.2022, GNorm = 0.4308, lr_0 = 4.8328e-04
Loss = 1.5283e-02, PNorm = 138.2263, GNorm = 0.1843, lr_0 = 4.8295e-04
Loss = 1.3088e-02, PNorm = 138.2596, GNorm = 0.3999, lr_0 = 4.8262e-04
Loss = 1.1616e-02, PNorm = 138.2957, GNorm = 0.3709, lr_0 = 4.8228e-04
Loss = 1.5112e-02, PNorm = 138.3283, GNorm = 0.6198, lr_0 = 4.8195e-04
Loss = 1.3713e-02, PNorm = 138.3601, GNorm = 0.3410, lr_0 = 4.8162e-04
Loss = 1.3356e-02, PNorm = 138.3938, GNorm = 0.3794, lr_0 = 4.8129e-04
Loss = 1.3079e-02, PNorm = 138.4268, GNorm = 0.6016, lr_0 = 4.8096e-04
Loss = 1.1537e-02, PNorm = 138.4537, GNorm = 0.3593, lr_0 = 4.8064e-04
Loss = 2.0424e-02, PNorm = 138.4839, GNorm = 0.7560, lr_0 = 4.8031e-04
Loss = 1.1518e-02, PNorm = 138.5166, GNorm = 0.2486, lr_0 = 4.7998e-04
Loss = 1.2671e-02, PNorm = 138.5536, GNorm = 0.5145, lr_0 = 4.7965e-04
Loss = 1.3180e-02, PNorm = 138.5835, GNorm = 0.2814, lr_0 = 4.7932e-04
Loss = 1.5270e-02, PNorm = 138.6167, GNorm = 0.3581, lr_0 = 4.7899e-04
Loss = 1.0575e-02, PNorm = 138.6474, GNorm = 0.1104, lr_0 = 4.7866e-04
Loss = 1.1490e-02, PNorm = 138.6752, GNorm = 0.4800, lr_0 = 4.7833e-04
Loss = 1.1110e-02, PNorm = 138.7050, GNorm = 0.1499, lr_0 = 4.7801e-04
Loss = 1.3880e-02, PNorm = 138.7444, GNorm = 0.1321, lr_0 = 4.7768e-04
Loss = 1.3228e-02, PNorm = 138.7782, GNorm = 0.1187, lr_0 = 4.7735e-04
Loss = 1.2533e-02, PNorm = 138.8069, GNorm = 0.2159, lr_0 = 4.7703e-04
Validation mae = 0.481772
Epoch 11
Loss = 1.0630e-02, PNorm = 138.8328, GNorm = 0.2170, lr_0 = 4.7670e-04
Loss = 1.3486e-02, PNorm = 138.8486, GNorm = 0.1285, lr_0 = 4.7637e-04
Loss = 1.0520e-02, PNorm = 138.8655, GNorm = 0.2241, lr_0 = 4.7605e-04
Loss = 1.0231e-02, PNorm = 138.8853, GNorm = 0.2231, lr_0 = 4.7572e-04
Loss = 1.0344e-02, PNorm = 138.9017, GNorm = 0.1388, lr_0 = 4.7539e-04
Loss = 1.0244e-02, PNorm = 138.9194, GNorm = 0.4007, lr_0 = 4.7507e-04
Loss = 1.1314e-02, PNorm = 138.9395, GNorm = 0.2217, lr_0 = 4.7474e-04
Loss = 1.1146e-02, PNorm = 138.9624, GNorm = 0.5863, lr_0 = 4.7442e-04
Loss = 9.5416e-03, PNorm = 138.9843, GNorm = 0.1706, lr_0 = 4.7409e-04
Loss = 1.0322e-02, PNorm = 139.0011, GNorm = 0.2523, lr_0 = 4.7377e-04
Loss = 9.4034e-03, PNorm = 139.0198, GNorm = 0.3843, lr_0 = 4.7344e-04
Loss = 1.2029e-02, PNorm = 139.0358, GNorm = 0.5931, lr_0 = 4.7312e-04
Loss = 1.3829e-02, PNorm = 139.0556, GNorm = 0.1437, lr_0 = 4.7279e-04
Loss = 1.1822e-02, PNorm = 139.0842, GNorm = 0.4078, lr_0 = 4.7247e-04
Loss = 1.2330e-02, PNorm = 139.1052, GNorm = 0.1212, lr_0 = 4.7215e-04
Loss = 9.6231e-03, PNorm = 139.1316, GNorm = 0.2826, lr_0 = 4.7182e-04
Loss = 1.2260e-02, PNorm = 139.1533, GNorm = 0.6640, lr_0 = 4.7150e-04
Loss = 1.0303e-02, PNorm = 139.1724, GNorm = 0.2434, lr_0 = 4.7118e-04
Loss = 1.0292e-02, PNorm = 139.1937, GNorm = 0.3429, lr_0 = 4.7085e-04
Loss = 9.8358e-03, PNorm = 139.2154, GNorm = 0.2502, lr_0 = 4.7053e-04
Loss = 7.6942e-03, PNorm = 139.2381, GNorm = 0.1566, lr_0 = 4.7021e-04
Loss = 1.0310e-02, PNorm = 139.2598, GNorm = 0.3694, lr_0 = 4.6989e-04
Loss = 9.7076e-03, PNorm = 139.2798, GNorm = 0.3605, lr_0 = 4.6957e-04
Loss = 8.7546e-03, PNorm = 139.3013, GNorm = 0.2941, lr_0 = 4.6924e-04
Loss = 8.8639e-03, PNorm = 139.3227, GNorm = 0.3176, lr_0 = 4.6892e-04
Loss = 8.9408e-03, PNorm = 139.3429, GNorm = 0.1918, lr_0 = 4.6860e-04
Loss = 1.0963e-02, PNorm = 139.3631, GNorm = 0.1650, lr_0 = 4.6828e-04
Loss = 1.0747e-02, PNorm = 139.3866, GNorm = 0.3392, lr_0 = 4.6796e-04
Loss = 1.0557e-02, PNorm = 139.4070, GNorm = 0.3140, lr_0 = 4.6764e-04
Loss = 1.0032e-02, PNorm = 139.4285, GNorm = 0.2189, lr_0 = 4.6732e-04
Loss = 8.8898e-03, PNorm = 139.4447, GNorm = 0.1258, lr_0 = 4.6700e-04
Loss = 1.0636e-02, PNorm = 139.4614, GNorm = 0.4588, lr_0 = 4.6668e-04
Loss = 8.3189e-03, PNorm = 139.4802, GNorm = 0.2624, lr_0 = 4.6636e-04
Loss = 8.8900e-03, PNorm = 139.5005, GNorm = 0.4560, lr_0 = 4.6604e-04
Loss = 9.1825e-03, PNorm = 139.5212, GNorm = 0.2390, lr_0 = 4.6572e-04
Loss = 1.0089e-02, PNorm = 139.5435, GNorm = 0.2824, lr_0 = 4.6540e-04
Loss = 9.3567e-03, PNorm = 139.5599, GNorm = 0.1833, lr_0 = 4.6508e-04
Loss = 1.2109e-02, PNorm = 139.5825, GNorm = 0.4160, lr_0 = 4.6476e-04
Loss = 1.2369e-02, PNorm = 139.6025, GNorm = 0.6104, lr_0 = 4.6445e-04
Loss = 1.0002e-02, PNorm = 139.6279, GNorm = 0.3110, lr_0 = 4.6413e-04
Loss = 7.5384e-03, PNorm = 139.6507, GNorm = 0.4936, lr_0 = 4.6381e-04
Loss = 1.0046e-02, PNorm = 139.6701, GNorm = 0.3413, lr_0 = 4.6349e-04
Loss = 9.9185e-03, PNorm = 139.6895, GNorm = 0.1188, lr_0 = 4.6317e-04
Loss = 9.4405e-03, PNorm = 139.7108, GNorm = 0.3463, lr_0 = 4.6286e-04
Loss = 9.4858e-03, PNorm = 139.7359, GNorm = 0.2934, lr_0 = 4.6254e-04
Loss = 9.8248e-03, PNorm = 139.7577, GNorm = 0.1683, lr_0 = 4.6222e-04
Loss = 1.0858e-02, PNorm = 139.7836, GNorm = 0.2177, lr_0 = 4.6191e-04
Loss = 9.1810e-03, PNorm = 139.8011, GNorm = 0.3662, lr_0 = 4.6159e-04
Loss = 9.7766e-03, PNorm = 139.8206, GNorm = 0.2054, lr_0 = 4.6127e-04
Loss = 1.3879e-02, PNorm = 139.8383, GNorm = 0.2478, lr_0 = 4.6096e-04
Loss = 1.0910e-02, PNorm = 139.8580, GNorm = 0.4907, lr_0 = 4.6064e-04
Loss = 1.2148e-02, PNorm = 139.8810, GNorm = 0.7181, lr_0 = 4.6033e-04
Loss = 9.7779e-03, PNorm = 139.9089, GNorm = 0.2422, lr_0 = 4.6001e-04
Loss = 1.0301e-02, PNorm = 139.9362, GNorm = 0.1523, lr_0 = 4.5970e-04
Loss = 1.0585e-02, PNorm = 139.9641, GNorm = 0.1175, lr_0 = 4.5938e-04
Loss = 9.0309e-03, PNorm = 139.9915, GNorm = 0.3200, lr_0 = 4.5907e-04
Loss = 9.0330e-03, PNorm = 140.0140, GNorm = 0.1354, lr_0 = 4.5875e-04
Loss = 1.0575e-02, PNorm = 140.0330, GNorm = 0.1900, lr_0 = 4.5844e-04
Loss = 7.8152e-03, PNorm = 140.0521, GNorm = 0.2374, lr_0 = 4.5812e-04
Loss = 9.8686e-03, PNorm = 140.0716, GNorm = 0.3085, lr_0 = 4.5781e-04
Loss = 1.0653e-02, PNorm = 140.0994, GNorm = 0.1884, lr_0 = 4.5750e-04
Loss = 9.4182e-03, PNorm = 140.1222, GNorm = 0.4613, lr_0 = 4.5718e-04
Loss = 1.0191e-02, PNorm = 140.1478, GNorm = 0.2163, lr_0 = 4.5687e-04
Loss = 9.9669e-03, PNorm = 140.1763, GNorm = 0.4472, lr_0 = 4.5656e-04
Loss = 9.8886e-03, PNorm = 140.1982, GNorm = 0.4466, lr_0 = 4.5624e-04
Loss = 9.2504e-03, PNorm = 140.2227, GNorm = 0.2147, lr_0 = 4.5593e-04
Loss = 1.0064e-02, PNorm = 140.2436, GNorm = 0.3467, lr_0 = 4.5562e-04
Loss = 1.1345e-02, PNorm = 140.2649, GNorm = 0.4417, lr_0 = 4.5531e-04
Loss = 1.0767e-02, PNorm = 140.2857, GNorm = 0.1188, lr_0 = 4.5499e-04
Loss = 1.0657e-02, PNorm = 140.3114, GNorm = 0.1767, lr_0 = 4.5468e-04
Loss = 9.8083e-03, PNorm = 140.3438, GNorm = 0.2938, lr_0 = 4.5437e-04
Loss = 1.0438e-02, PNorm = 140.3722, GNorm = 0.4447, lr_0 = 4.5406e-04
Loss = 1.0324e-02, PNorm = 140.3952, GNorm = 0.2216, lr_0 = 4.5375e-04
Loss = 1.5368e-02, PNorm = 140.4202, GNorm = 0.3424, lr_0 = 4.5344e-04
Loss = 9.9297e-03, PNorm = 140.4517, GNorm = 0.2291, lr_0 = 4.5313e-04
Loss = 1.0044e-02, PNorm = 140.4824, GNorm = 0.1249, lr_0 = 4.5282e-04
Loss = 1.1961e-02, PNorm = 140.5047, GNorm = 0.4626, lr_0 = 4.5251e-04
Loss = 1.0199e-02, PNorm = 140.5271, GNorm = 0.6455, lr_0 = 4.5220e-04
Loss = 8.5218e-03, PNorm = 140.5497, GNorm = 0.4849, lr_0 = 4.5189e-04
Loss = 1.0845e-02, PNorm = 140.5790, GNorm = 0.3095, lr_0 = 4.5158e-04
Loss = 1.1416e-02, PNorm = 140.6056, GNorm = 0.1491, lr_0 = 4.5127e-04
Loss = 9.6441e-03, PNorm = 140.6308, GNorm = 0.1330, lr_0 = 4.5096e-04
Loss = 1.0064e-02, PNorm = 140.6550, GNorm = 0.1570, lr_0 = 4.5065e-04
Loss = 1.4765e-02, PNorm = 140.6772, GNorm = 0.2360, lr_0 = 4.5034e-04
Loss = 8.4746e-03, PNorm = 140.7033, GNorm = 0.1232, lr_0 = 4.5003e-04
Loss = 9.6231e-03, PNorm = 140.7314, GNorm = 0.4866, lr_0 = 4.4972e-04
Loss = 9.1058e-03, PNorm = 140.7603, GNorm = 0.0932, lr_0 = 4.4942e-04
Loss = 1.0016e-02, PNorm = 140.7885, GNorm = 0.0983, lr_0 = 4.4911e-04
Loss = 1.0945e-02, PNorm = 140.8148, GNorm = 0.3649, lr_0 = 4.4880e-04
Loss = 9.3222e-03, PNorm = 140.8424, GNorm = 0.1720, lr_0 = 4.4849e-04
Loss = 1.0951e-02, PNorm = 140.8652, GNorm = 0.3667, lr_0 = 4.4819e-04
Loss = 1.0293e-02, PNorm = 140.8927, GNorm = 0.2519, lr_0 = 4.4788e-04
Loss = 1.0156e-02, PNorm = 140.9168, GNorm = 0.4604, lr_0 = 4.4757e-04
Loss = 1.0264e-02, PNorm = 140.9415, GNorm = 0.1834, lr_0 = 4.4727e-04
Loss = 9.7662e-03, PNorm = 140.9618, GNorm = 0.4090, lr_0 = 4.4696e-04
Loss = 9.0989e-03, PNorm = 140.9861, GNorm = 0.3853, lr_0 = 4.4665e-04
Loss = 9.5251e-03, PNorm = 141.0096, GNorm = 0.3810, lr_0 = 4.4635e-04
Loss = 1.0715e-02, PNorm = 141.0366, GNorm = 0.2805, lr_0 = 4.4604e-04
Loss = 1.1100e-02, PNorm = 141.0614, GNorm = 0.3455, lr_0 = 4.4574e-04
Loss = 1.1409e-02, PNorm = 141.0870, GNorm = 0.3133, lr_0 = 4.4543e-04
Loss = 1.4178e-02, PNorm = 141.1194, GNorm = 0.1967, lr_0 = 4.4513e-04
Loss = 9.5452e-03, PNorm = 141.1463, GNorm = 0.4306, lr_0 = 4.4482e-04
Loss = 1.0050e-02, PNorm = 141.1707, GNorm = 0.2717, lr_0 = 4.4452e-04
Loss = 1.1158e-02, PNorm = 141.1917, GNorm = 0.3312, lr_0 = 4.4421e-04
Loss = 1.2377e-02, PNorm = 141.2154, GNorm = 0.3042, lr_0 = 4.4391e-04
Loss = 8.9061e-03, PNorm = 141.2414, GNorm = 0.2151, lr_0 = 4.4360e-04
Loss = 9.6197e-03, PNorm = 141.2686, GNorm = 0.3616, lr_0 = 4.4330e-04
Loss = 8.5968e-03, PNorm = 141.2940, GNorm = 0.2475, lr_0 = 4.4299e-04
Loss = 9.6761e-03, PNorm = 141.3163, GNorm = 0.2655, lr_0 = 4.4269e-04
Loss = 1.0176e-02, PNorm = 141.3388, GNorm = 0.2720, lr_0 = 4.4239e-04
Loss = 1.1743e-02, PNorm = 141.3597, GNorm = 0.2156, lr_0 = 4.4209e-04
Loss = 1.0476e-02, PNorm = 141.3826, GNorm = 0.1920, lr_0 = 4.4178e-04
Loss = 1.0393e-02, PNorm = 141.4082, GNorm = 0.2132, lr_0 = 4.4148e-04
Loss = 8.1023e-03, PNorm = 141.4330, GNorm = 0.2520, lr_0 = 4.4118e-04
Loss = 1.0095e-02, PNorm = 141.4521, GNorm = 0.4479, lr_0 = 4.4088e-04
Loss = 9.1360e-03, PNorm = 141.4764, GNorm = 0.3002, lr_0 = 4.4057e-04
Loss = 1.2201e-02, PNorm = 141.5052, GNorm = 0.3586, lr_0 = 4.4027e-04
Loss = 1.3411e-02, PNorm = 141.5292, GNorm = 0.2356, lr_0 = 4.3997e-04
Loss = 1.0279e-02, PNorm = 141.5507, GNorm = 0.1964, lr_0 = 4.3967e-04
Loss = 1.0138e-02, PNorm = 141.5764, GNorm = 0.3239, lr_0 = 4.3937e-04
Validation mae = 0.482952
Epoch 12
Loss = 8.8105e-03, PNorm = 141.5974, GNorm = 0.2994, lr_0 = 4.3907e-04
Loss = 8.3545e-03, PNorm = 141.6109, GNorm = 0.2222, lr_0 = 4.3877e-04
Loss = 9.1599e-03, PNorm = 141.6295, GNorm = 0.2791, lr_0 = 4.3846e-04
Loss = 9.3790e-03, PNorm = 141.6455, GNorm = 0.1777, lr_0 = 4.3816e-04
Loss = 8.9908e-03, PNorm = 141.6628, GNorm = 0.7100, lr_0 = 4.3786e-04
Loss = 8.4516e-03, PNorm = 141.6808, GNorm = 0.2705, lr_0 = 4.3756e-04
Loss = 8.1887e-03, PNorm = 141.7017, GNorm = 0.2415, lr_0 = 4.3726e-04
Loss = 1.1043e-02, PNorm = 141.7217, GNorm = 0.4261, lr_0 = 4.3696e-04
Loss = 7.7852e-03, PNorm = 141.7464, GNorm = 0.7224, lr_0 = 4.3667e-04
Loss = 9.8211e-03, PNorm = 141.7657, GNorm = 0.2069, lr_0 = 4.3637e-04
Loss = 7.6681e-03, PNorm = 141.7807, GNorm = 0.1658, lr_0 = 4.3607e-04
Loss = 8.6706e-03, PNorm = 141.7966, GNorm = 0.4014, lr_0 = 4.3577e-04
Loss = 8.5337e-03, PNorm = 141.8169, GNorm = 0.1192, lr_0 = 4.3547e-04
Loss = 8.3727e-03, PNorm = 141.8384, GNorm = 0.1774, lr_0 = 4.3517e-04
Loss = 9.4642e-03, PNorm = 141.8596, GNorm = 0.3200, lr_0 = 4.3487e-04
Loss = 1.1819e-02, PNorm = 141.8782, GNorm = 0.2875, lr_0 = 4.3458e-04
Loss = 9.1775e-03, PNorm = 141.8947, GNorm = 0.3833, lr_0 = 4.3428e-04
Loss = 7.6219e-03, PNorm = 141.9125, GNorm = 0.2521, lr_0 = 4.3398e-04
Loss = 8.4496e-03, PNorm = 141.9287, GNorm = 0.0982, lr_0 = 4.3368e-04
Loss = 7.3552e-03, PNorm = 141.9460, GNorm = 0.2339, lr_0 = 4.3339e-04
Loss = 9.8367e-03, PNorm = 141.9607, GNorm = 0.2568, lr_0 = 4.3309e-04
Loss = 7.3832e-03, PNorm = 141.9790, GNorm = 0.2568, lr_0 = 4.3279e-04
Loss = 6.2423e-03, PNorm = 141.9935, GNorm = 0.2154, lr_0 = 4.3250e-04
Loss = 9.5154e-03, PNorm = 142.0095, GNorm = 0.0960, lr_0 = 4.3220e-04
Loss = 7.3327e-03, PNorm = 142.0276, GNorm = 0.1138, lr_0 = 4.3190e-04
Loss = 7.6455e-03, PNorm = 142.0377, GNorm = 0.2363, lr_0 = 4.3161e-04
Loss = 8.3570e-03, PNorm = 142.0527, GNorm = 0.2914, lr_0 = 4.3131e-04
Loss = 7.5723e-03, PNorm = 142.0662, GNorm = 0.2021, lr_0 = 4.3102e-04
Loss = 1.1738e-02, PNorm = 142.0819, GNorm = 0.3513, lr_0 = 4.3072e-04
Loss = 8.0721e-03, PNorm = 142.0987, GNorm = 0.1264, lr_0 = 4.3043e-04
Loss = 8.2337e-03, PNorm = 142.1176, GNorm = 0.1522, lr_0 = 4.3013e-04
Loss = 9.0388e-03, PNorm = 142.1338, GNorm = 0.3552, lr_0 = 4.2984e-04
Loss = 7.8144e-03, PNorm = 142.1533, GNorm = 0.2699, lr_0 = 4.2954e-04
Loss = 6.7746e-03, PNorm = 142.1751, GNorm = 0.1581, lr_0 = 4.2925e-04
Loss = 7.2921e-03, PNorm = 142.1917, GNorm = 0.1406, lr_0 = 4.2895e-04
Loss = 7.3288e-03, PNorm = 142.2118, GNorm = 0.1711, lr_0 = 4.2866e-04
Loss = 6.8134e-03, PNorm = 142.2328, GNorm = 0.2403, lr_0 = 4.2837e-04
Loss = 8.6233e-03, PNorm = 142.2477, GNorm = 0.2004, lr_0 = 4.2807e-04
Loss = 1.0005e-02, PNorm = 142.2679, GNorm = 0.4158, lr_0 = 4.2778e-04
Loss = 9.6051e-03, PNorm = 142.2901, GNorm = 0.4857, lr_0 = 4.2749e-04
Loss = 1.0789e-02, PNorm = 142.3108, GNorm = 0.4209, lr_0 = 4.2719e-04
Loss = 9.2240e-03, PNorm = 142.3282, GNorm = 0.4342, lr_0 = 4.2690e-04
Loss = 8.1771e-03, PNorm = 142.3415, GNorm = 0.3764, lr_0 = 4.2661e-04
Loss = 7.7559e-03, PNorm = 142.3653, GNorm = 0.2279, lr_0 = 4.2632e-04
Loss = 8.0138e-03, PNorm = 142.3860, GNorm = 0.3430, lr_0 = 4.2602e-04
Loss = 1.1786e-02, PNorm = 142.4067, GNorm = 0.3153, lr_0 = 4.2573e-04
Loss = 8.5195e-03, PNorm = 142.4215, GNorm = 0.5175, lr_0 = 4.2544e-04
Loss = 7.2744e-03, PNorm = 142.4390, GNorm = 0.1215, lr_0 = 4.2515e-04
Loss = 7.3783e-03, PNorm = 142.4592, GNorm = 0.2234, lr_0 = 4.2486e-04
Loss = 7.5388e-03, PNorm = 142.4800, GNorm = 0.2997, lr_0 = 4.2457e-04
Loss = 1.0256e-02, PNorm = 142.4993, GNorm = 0.2590, lr_0 = 4.2428e-04
Loss = 7.2358e-03, PNorm = 142.5174, GNorm = 0.1781, lr_0 = 4.2399e-04
Loss = 9.5044e-03, PNorm = 142.5338, GNorm = 0.3553, lr_0 = 4.2370e-04
Loss = 7.8027e-03, PNorm = 142.5526, GNorm = 0.1688, lr_0 = 4.2340e-04
Loss = 7.1966e-03, PNorm = 142.5702, GNorm = 0.2894, lr_0 = 4.2311e-04
Loss = 8.9865e-03, PNorm = 142.5885, GNorm = 0.1054, lr_0 = 4.2283e-04
Loss = 7.7569e-03, PNorm = 142.6130, GNorm = 0.2523, lr_0 = 4.2254e-04
Loss = 9.6829e-03, PNorm = 142.6309, GNorm = 0.5429, lr_0 = 4.2225e-04
Loss = 9.2387e-03, PNorm = 142.6490, GNorm = 0.1378, lr_0 = 4.2196e-04
Loss = 6.6837e-03, PNorm = 142.6680, GNorm = 0.3209, lr_0 = 4.2167e-04
Loss = 8.3503e-03, PNorm = 142.6844, GNorm = 0.5249, lr_0 = 4.2138e-04
Loss = 1.0506e-02, PNorm = 142.7017, GNorm = 0.3153, lr_0 = 4.2109e-04
Loss = 7.8590e-03, PNorm = 142.7245, GNorm = 0.3364, lr_0 = 4.2080e-04
Loss = 8.0335e-03, PNorm = 142.7468, GNorm = 0.2028, lr_0 = 4.2051e-04
Loss = 7.2658e-03, PNorm = 142.7650, GNorm = 0.1997, lr_0 = 4.2023e-04
Loss = 8.6187e-03, PNorm = 142.7811, GNorm = 0.2954, lr_0 = 4.1994e-04
Loss = 8.4404e-03, PNorm = 142.7980, GNorm = 0.1525, lr_0 = 4.1965e-04
Loss = 9.0499e-03, PNorm = 142.8133, GNorm = 0.1811, lr_0 = 4.1936e-04
Loss = 8.3752e-03, PNorm = 142.8282, GNorm = 0.2817, lr_0 = 4.1907e-04
Loss = 8.1500e-03, PNorm = 142.8450, GNorm = 0.1742, lr_0 = 4.1879e-04
Loss = 1.1926e-02, PNorm = 142.8666, GNorm = 0.4284, lr_0 = 4.1850e-04
Loss = 8.9868e-03, PNorm = 142.8865, GNorm = 0.4354, lr_0 = 4.1821e-04
Loss = 8.9947e-03, PNorm = 142.9091, GNorm = 0.3375, lr_0 = 4.1793e-04
Loss = 9.7755e-03, PNorm = 142.9309, GNorm = 0.3742, lr_0 = 4.1764e-04
Loss = 9.5433e-03, PNorm = 142.9514, GNorm = 0.2860, lr_0 = 4.1736e-04
Loss = 1.0231e-02, PNorm = 142.9782, GNorm = 0.3118, lr_0 = 4.1707e-04
Loss = 9.4042e-03, PNorm = 143.0023, GNorm = 0.1019, lr_0 = 4.1678e-04
Loss = 7.0744e-03, PNorm = 143.0197, GNorm = 0.1392, lr_0 = 4.1650e-04
Loss = 8.6265e-03, PNorm = 143.0390, GNorm = 0.5205, lr_0 = 4.1621e-04
Loss = 8.1812e-03, PNorm = 143.0586, GNorm = 0.2368, lr_0 = 4.1593e-04
Loss = 1.0687e-02, PNorm = 143.0773, GNorm = 0.3178, lr_0 = 4.1564e-04
Loss = 7.5251e-03, PNorm = 143.0981, GNorm = 0.2564, lr_0 = 4.1536e-04
Loss = 8.1623e-03, PNorm = 143.1185, GNorm = 0.2056, lr_0 = 4.1507e-04
Loss = 7.5658e-03, PNorm = 143.1396, GNorm = 0.1535, lr_0 = 4.1479e-04
Loss = 1.0046e-02, PNorm = 143.1606, GNorm = 0.1429, lr_0 = 4.1450e-04
Loss = 9.7816e-03, PNorm = 143.1808, GNorm = 0.1394, lr_0 = 4.1422e-04
Loss = 8.7428e-03, PNorm = 143.2027, GNorm = 0.1282, lr_0 = 4.1394e-04
Loss = 8.3462e-03, PNorm = 143.2237, GNorm = 0.1510, lr_0 = 4.1365e-04
Loss = 8.8754e-03, PNorm = 143.2446, GNorm = 0.1390, lr_0 = 4.1337e-04
Loss = 1.0061e-02, PNorm = 143.2654, GNorm = 0.1252, lr_0 = 4.1309e-04
Loss = 8.7807e-03, PNorm = 143.2878, GNorm = 0.2377, lr_0 = 4.1280e-04
Loss = 1.0626e-02, PNorm = 143.3116, GNorm = 0.4455, lr_0 = 4.1252e-04
Loss = 8.4373e-03, PNorm = 143.3337, GNorm = 0.2165, lr_0 = 4.1224e-04
Loss = 8.1353e-03, PNorm = 143.3564, GNorm = 0.3259, lr_0 = 4.1196e-04
Loss = 8.4776e-03, PNorm = 143.3780, GNorm = 0.3042, lr_0 = 4.1167e-04
Loss = 1.0013e-02, PNorm = 143.3991, GNorm = 0.1371, lr_0 = 4.1139e-04
Loss = 1.0361e-02, PNorm = 143.4198, GNorm = 0.1814, lr_0 = 4.1111e-04
Loss = 9.5765e-03, PNorm = 143.4395, GNorm = 0.3108, lr_0 = 4.1083e-04
Loss = 7.7589e-03, PNorm = 143.4551, GNorm = 0.4923, lr_0 = 4.1055e-04
Loss = 9.5760e-03, PNorm = 143.4742, GNorm = 0.3250, lr_0 = 4.1027e-04
Loss = 9.4294e-03, PNorm = 143.4955, GNorm = 0.1783, lr_0 = 4.0998e-04
Loss = 9.9811e-03, PNorm = 143.5160, GNorm = 0.1792, lr_0 = 4.0970e-04
Loss = 8.0225e-03, PNorm = 143.5389, GNorm = 0.3729, lr_0 = 4.0942e-04
Loss = 8.0838e-03, PNorm = 143.5579, GNorm = 0.1852, lr_0 = 4.0914e-04
Loss = 9.5604e-03, PNorm = 143.5747, GNorm = 0.2387, lr_0 = 4.0886e-04
Loss = 6.7113e-03, PNorm = 143.5922, GNorm = 0.4402, lr_0 = 4.0858e-04
Loss = 1.0724e-02, PNorm = 143.6106, GNorm = 0.7749, lr_0 = 4.0830e-04
Loss = 7.1849e-03, PNorm = 143.6308, GNorm = 0.2425, lr_0 = 4.0802e-04
Loss = 8.0770e-03, PNorm = 143.6476, GNorm = 0.2641, lr_0 = 4.0774e-04
Loss = 1.0025e-02, PNorm = 143.6654, GNorm = 0.3279, lr_0 = 4.0746e-04
Loss = 7.9095e-03, PNorm = 143.6869, GNorm = 0.1168, lr_0 = 4.0718e-04
Loss = 8.4703e-03, PNorm = 143.7091, GNorm = 0.2026, lr_0 = 4.0691e-04
Loss = 9.4540e-03, PNorm = 143.7286, GNorm = 0.1109, lr_0 = 4.0663e-04
Loss = 7.1936e-03, PNorm = 143.7523, GNorm = 0.2612, lr_0 = 4.0635e-04
Loss = 8.2066e-03, PNorm = 143.7697, GNorm = 0.3922, lr_0 = 4.0607e-04
Loss = 7.9369e-03, PNorm = 143.7857, GNorm = 0.2000, lr_0 = 4.0579e-04
Loss = 7.0698e-03, PNorm = 143.8063, GNorm = 0.2932, lr_0 = 4.0551e-04
Loss = 9.6848e-03, PNorm = 143.8215, GNorm = 0.3725, lr_0 = 4.0524e-04
Loss = 8.3711e-03, PNorm = 143.8458, GNorm = 0.1492, lr_0 = 4.0496e-04
Loss = 7.9769e-03, PNorm = 143.8703, GNorm = 0.1475, lr_0 = 4.0468e-04
Validation mae = 0.480337
Epoch 13
Loss = 7.0295e-03, PNorm = 143.8886, GNorm = 0.0675, lr_0 = 4.0440e-04
Loss = 6.3310e-03, PNorm = 143.8987, GNorm = 0.2169, lr_0 = 4.0413e-04
Loss = 8.5734e-03, PNorm = 143.9107, GNorm = 0.2216, lr_0 = 4.0385e-04
Loss = 7.6319e-03, PNorm = 143.9269, GNorm = 0.2057, lr_0 = 4.0357e-04
Loss = 6.5560e-03, PNorm = 143.9410, GNorm = 0.1858, lr_0 = 4.0330e-04
Loss = 7.1036e-03, PNorm = 143.9534, GNorm = 0.1958, lr_0 = 4.0302e-04
Loss = 6.3346e-03, PNorm = 143.9674, GNorm = 0.2655, lr_0 = 4.0274e-04
Loss = 8.2668e-03, PNorm = 143.9829, GNorm = 0.1072, lr_0 = 4.0247e-04
Loss = 6.0058e-03, PNorm = 143.9975, GNorm = 0.2066, lr_0 = 4.0219e-04
Loss = 7.8146e-03, PNorm = 144.0173, GNorm = 0.1237, lr_0 = 4.0192e-04
Loss = 6.3554e-03, PNorm = 144.0347, GNorm = 0.1433, lr_0 = 4.0164e-04
Loss = 7.8072e-03, PNorm = 144.0496, GNorm = 0.1776, lr_0 = 4.0137e-04
Loss = 7.6087e-03, PNorm = 144.0631, GNorm = 0.2324, lr_0 = 4.0109e-04
Loss = 7.6524e-03, PNorm = 144.0755, GNorm = 0.5134, lr_0 = 4.0082e-04
Loss = 6.3967e-03, PNorm = 144.0932, GNorm = 0.2077, lr_0 = 4.0054e-04
Loss = 6.5619e-03, PNorm = 144.1095, GNorm = 0.2131, lr_0 = 4.0027e-04
Loss = 6.1782e-03, PNorm = 144.1209, GNorm = 0.1175, lr_0 = 3.9999e-04
Loss = 7.0867e-03, PNorm = 144.1352, GNorm = 0.1246, lr_0 = 3.9972e-04
Loss = 6.6326e-03, PNorm = 144.1444, GNorm = 0.3272, lr_0 = 3.9945e-04
Loss = 5.9953e-03, PNorm = 144.1598, GNorm = 0.2007, lr_0 = 3.9917e-04
Loss = 7.5948e-03, PNorm = 144.1785, GNorm = 0.3711, lr_0 = 3.9890e-04
Loss = 7.3540e-03, PNorm = 144.1940, GNorm = 0.2242, lr_0 = 3.9863e-04
Loss = 6.1541e-03, PNorm = 144.2069, GNorm = 0.0931, lr_0 = 3.9835e-04
Loss = 7.7109e-03, PNorm = 144.2220, GNorm = 0.4405, lr_0 = 3.9808e-04
Loss = 6.1146e-03, PNorm = 144.2357, GNorm = 0.2491, lr_0 = 3.9781e-04
Loss = 5.8158e-03, PNorm = 144.2476, GNorm = 0.1955, lr_0 = 3.9753e-04
Loss = 7.7871e-03, PNorm = 144.2569, GNorm = 0.2199, lr_0 = 3.9726e-04
Loss = 7.9976e-03, PNorm = 144.2684, GNorm = 0.1962, lr_0 = 3.9699e-04
Loss = 6.7127e-03, PNorm = 144.2820, GNorm = 0.2641, lr_0 = 3.9672e-04
Loss = 6.1000e-03, PNorm = 144.2960, GNorm = 0.1577, lr_0 = 3.9645e-04
Loss = 5.7143e-03, PNorm = 144.3111, GNorm = 0.1348, lr_0 = 3.9617e-04
Loss = 7.3771e-03, PNorm = 144.3233, GNorm = 0.1434, lr_0 = 3.9590e-04
Loss = 6.0694e-03, PNorm = 144.3361, GNorm = 0.1738, lr_0 = 3.9563e-04
Loss = 7.9359e-03, PNorm = 144.3514, GNorm = 0.1818, lr_0 = 3.9536e-04
Loss = 7.9815e-03, PNorm = 144.3690, GNorm = 0.1683, lr_0 = 3.9509e-04
Loss = 7.8155e-03, PNorm = 144.3857, GNorm = 0.2297, lr_0 = 3.9482e-04
Loss = 8.0469e-03, PNorm = 144.3955, GNorm = 0.4883, lr_0 = 3.9455e-04
Loss = 6.8040e-03, PNorm = 144.4109, GNorm = 0.3348, lr_0 = 3.9428e-04
Loss = 7.1977e-03, PNorm = 144.4291, GNorm = 0.3266, lr_0 = 3.9401e-04
Loss = 8.7180e-03, PNorm = 144.4515, GNorm = 0.2326, lr_0 = 3.9374e-04
Loss = 5.7475e-03, PNorm = 144.4716, GNorm = 0.4689, lr_0 = 3.9347e-04
Loss = 7.5831e-03, PNorm = 144.4900, GNorm = 0.4776, lr_0 = 3.9320e-04
Loss = 7.7837e-03, PNorm = 144.5080, GNorm = 0.2678, lr_0 = 3.9293e-04
Loss = 7.0994e-03, PNorm = 144.5253, GNorm = 0.1112, lr_0 = 3.9266e-04
Loss = 6.5514e-03, PNorm = 144.5384, GNorm = 0.2905, lr_0 = 3.9239e-04
Loss = 7.8492e-03, PNorm = 144.5549, GNorm = 0.7337, lr_0 = 3.9212e-04
Loss = 9.7780e-03, PNorm = 144.5678, GNorm = 0.5183, lr_0 = 3.9185e-04
Loss = 7.2368e-03, PNorm = 144.5833, GNorm = 0.4763, lr_0 = 3.9159e-04
Loss = 6.9261e-03, PNorm = 144.6018, GNorm = 0.2471, lr_0 = 3.9132e-04
Loss = 1.0126e-02, PNorm = 144.6202, GNorm = 0.2302, lr_0 = 3.9105e-04
Loss = 6.4378e-03, PNorm = 144.6368, GNorm = 0.3382, lr_0 = 3.9078e-04
Loss = 9.8907e-03, PNorm = 144.6543, GNorm = 0.1098, lr_0 = 3.9051e-04
Loss = 8.2722e-03, PNorm = 144.6663, GNorm = 0.1762, lr_0 = 3.9025e-04
Loss = 9.0682e-03, PNorm = 144.6840, GNorm = 0.1219, lr_0 = 3.8998e-04
Loss = 9.1509e-03, PNorm = 144.7008, GNorm = 0.2612, lr_0 = 3.8971e-04
Loss = 7.8812e-03, PNorm = 144.7168, GNorm = 0.4389, lr_0 = 3.8945e-04
Loss = 6.8610e-03, PNorm = 144.7382, GNorm = 0.2380, lr_0 = 3.8918e-04
Loss = 6.5701e-03, PNorm = 144.7507, GNorm = 0.1136, lr_0 = 3.8891e-04
Loss = 6.2532e-03, PNorm = 144.7630, GNorm = 0.2593, lr_0 = 3.8865e-04
Loss = 6.6721e-03, PNorm = 144.7794, GNorm = 0.3385, lr_0 = 3.8838e-04
Loss = 5.8307e-03, PNorm = 144.7978, GNorm = 0.2680, lr_0 = 3.8811e-04
Loss = 6.9107e-03, PNorm = 144.8177, GNorm = 0.4574, lr_0 = 3.8785e-04
Loss = 9.9842e-03, PNorm = 144.8296, GNorm = 0.3199, lr_0 = 3.8758e-04
Loss = 8.6298e-03, PNorm = 144.8493, GNorm = 0.3871, lr_0 = 3.8732e-04
Loss = 7.9036e-03, PNorm = 144.8672, GNorm = 0.0813, lr_0 = 3.8705e-04
Loss = 6.2136e-03, PNorm = 144.8833, GNorm = 0.3587, lr_0 = 3.8679e-04
Loss = 6.7243e-03, PNorm = 144.9029, GNorm = 0.2520, lr_0 = 3.8652e-04
Loss = 7.5328e-03, PNorm = 144.9250, GNorm = 0.3413, lr_0 = 3.8626e-04
Loss = 7.3423e-03, PNorm = 144.9441, GNorm = 0.2596, lr_0 = 3.8599e-04
Loss = 8.8962e-03, PNorm = 144.9608, GNorm = 0.3985, lr_0 = 3.8573e-04
Loss = 9.7665e-03, PNorm = 144.9790, GNorm = 0.5481, lr_0 = 3.8546e-04
Loss = 6.1819e-03, PNorm = 144.9966, GNorm = 0.2551, lr_0 = 3.8520e-04
Loss = 7.5006e-03, PNorm = 145.0128, GNorm = 0.2977, lr_0 = 3.8493e-04
Loss = 8.1955e-03, PNorm = 145.0302, GNorm = 0.5754, lr_0 = 3.8467e-04
Loss = 8.5437e-03, PNorm = 145.0481, GNorm = 0.1534, lr_0 = 3.8441e-04
Loss = 6.3242e-03, PNorm = 145.0593, GNorm = 0.2502, lr_0 = 3.8414e-04
Loss = 6.6893e-03, PNorm = 145.0754, GNorm = 0.0949, lr_0 = 3.8388e-04
Loss = 6.6376e-03, PNorm = 145.0921, GNorm = 0.1529, lr_0 = 3.8362e-04
Loss = 6.5306e-03, PNorm = 145.1105, GNorm = 0.1270, lr_0 = 3.8336e-04
Loss = 6.6935e-03, PNorm = 145.1265, GNorm = 0.3165, lr_0 = 3.8309e-04
Loss = 7.6931e-03, PNorm = 145.1427, GNorm = 0.5940, lr_0 = 3.8283e-04
Loss = 6.9894e-03, PNorm = 145.1610, GNorm = 0.4399, lr_0 = 3.8257e-04
Loss = 6.0900e-03, PNorm = 145.1780, GNorm = 0.3211, lr_0 = 3.8231e-04
Loss = 5.4636e-03, PNorm = 145.1909, GNorm = 0.0907, lr_0 = 3.8204e-04
Loss = 6.1079e-03, PNorm = 145.2066, GNorm = 0.2610, lr_0 = 3.8178e-04
Loss = 7.8607e-03, PNorm = 145.2256, GNorm = 0.2532, lr_0 = 3.8152e-04
Loss = 7.0966e-03, PNorm = 145.2387, GNorm = 0.2119, lr_0 = 3.8126e-04
Loss = 7.0331e-03, PNorm = 145.2563, GNorm = 0.3794, lr_0 = 3.8100e-04
Loss = 7.8607e-03, PNorm = 145.2750, GNorm = 0.4576, lr_0 = 3.8074e-04
Loss = 6.6866e-03, PNorm = 145.2924, GNorm = 0.3572, lr_0 = 3.8048e-04
Loss = 8.0882e-03, PNorm = 145.3089, GNorm = 0.3232, lr_0 = 3.8022e-04
Loss = 7.0376e-03, PNorm = 145.3252, GNorm = 0.1967, lr_0 = 3.7995e-04
Loss = 8.0258e-03, PNorm = 145.3426, GNorm = 0.4991, lr_0 = 3.7969e-04
Loss = 7.7474e-03, PNorm = 145.3632, GNorm = 0.3918, lr_0 = 3.7943e-04
Loss = 8.1742e-03, PNorm = 145.3827, GNorm = 0.1820, lr_0 = 3.7917e-04
Loss = 6.0391e-03, PNorm = 145.4045, GNorm = 0.2459, lr_0 = 3.7891e-04
Loss = 8.2550e-03, PNorm = 145.4226, GNorm = 0.1856, lr_0 = 3.7866e-04
Loss = 6.6288e-03, PNorm = 145.4391, GNorm = 0.1789, lr_0 = 3.7840e-04
Loss = 8.3031e-03, PNorm = 145.4583, GNorm = 0.1656, lr_0 = 3.7814e-04
Loss = 7.8793e-03, PNorm = 145.4787, GNorm = 0.2081, lr_0 = 3.7788e-04
Loss = 7.3537e-03, PNorm = 145.4990, GNorm = 0.1476, lr_0 = 3.7762e-04
Loss = 7.9172e-03, PNorm = 145.5155, GNorm = 0.2111, lr_0 = 3.7736e-04
Loss = 6.7515e-03, PNorm = 145.5339, GNorm = 0.1901, lr_0 = 3.7710e-04
Loss = 7.8243e-03, PNorm = 145.5511, GNorm = 0.0735, lr_0 = 3.7684e-04
Loss = 6.5209e-03, PNorm = 145.5654, GNorm = 0.7931, lr_0 = 3.7659e-04
Loss = 6.5159e-03, PNorm = 145.5783, GNorm = 0.1628, lr_0 = 3.7633e-04
Loss = 6.5833e-03, PNorm = 145.5948, GNorm = 0.1123, lr_0 = 3.7607e-04
Loss = 7.1447e-03, PNorm = 145.6097, GNorm = 0.0676, lr_0 = 3.7581e-04
Loss = 1.0443e-02, PNorm = 145.6236, GNorm = 0.1489, lr_0 = 3.7555e-04
Loss = 9.2012e-03, PNorm = 145.6405, GNorm = 0.1929, lr_0 = 3.7530e-04
Loss = 7.7215e-03, PNorm = 145.6618, GNorm = 0.1577, lr_0 = 3.7504e-04
Loss = 6.1656e-03, PNorm = 145.6811, GNorm = 0.1233, lr_0 = 3.7478e-04
Loss = 6.2682e-03, PNorm = 145.6971, GNorm = 0.1350, lr_0 = 3.7453e-04
Loss = 9.7727e-03, PNorm = 145.7108, GNorm = 0.1405, lr_0 = 3.7427e-04
Loss = 8.1341e-03, PNorm = 145.7258, GNorm = 0.2376, lr_0 = 3.7401e-04
Loss = 7.7577e-03, PNorm = 145.7484, GNorm = 0.3596, lr_0 = 3.7376e-04
Loss = 7.3804e-03, PNorm = 145.7704, GNorm = 0.4451, lr_0 = 3.7350e-04
Loss = 6.1415e-03, PNorm = 145.7887, GNorm = 0.1034, lr_0 = 3.7325e-04
Loss = 5.8770e-03, PNorm = 145.8055, GNorm = 0.1392, lr_0 = 3.7299e-04
Loss = 7.0275e-03, PNorm = 145.8240, GNorm = 0.0942, lr_0 = 3.7273e-04
Validation mae = 0.478099
Epoch 14
Loss = 6.4961e-03, PNorm = 145.8335, GNorm = 0.3276, lr_0 = 3.7248e-04
Loss = 6.8071e-03, PNorm = 145.8419, GNorm = 0.0725, lr_0 = 3.7222e-04
Loss = 7.5754e-03, PNorm = 145.8536, GNorm = 0.3088, lr_0 = 3.7197e-04
Loss = 5.6611e-03, PNorm = 145.8653, GNorm = 0.1395, lr_0 = 3.7171e-04
Loss = 6.0133e-03, PNorm = 145.8780, GNorm = 0.1735, lr_0 = 3.7146e-04
Loss = 6.7249e-03, PNorm = 145.8924, GNorm = 0.4057, lr_0 = 3.7120e-04
Loss = 7.3890e-03, PNorm = 145.9024, GNorm = 0.1576, lr_0 = 3.7095e-04
Loss = 6.6486e-03, PNorm = 145.9135, GNorm = 0.2566, lr_0 = 3.7070e-04
Loss = 5.6416e-03, PNorm = 145.9273, GNorm = 0.2754, lr_0 = 3.7044e-04
Loss = 6.4958e-03, PNorm = 145.9400, GNorm = 0.2056, lr_0 = 3.7019e-04
Loss = 5.9284e-03, PNorm = 145.9504, GNorm = 0.2915, lr_0 = 3.6993e-04
Loss = 5.1560e-03, PNorm = 145.9637, GNorm = 0.4069, lr_0 = 3.6968e-04
Loss = 6.0274e-03, PNorm = 145.9740, GNorm = 0.2876, lr_0 = 3.6943e-04
Loss = 6.2291e-03, PNorm = 145.9822, GNorm = 0.1301, lr_0 = 3.6917e-04
Loss = 5.7870e-03, PNorm = 145.9953, GNorm = 0.2626, lr_0 = 3.6892e-04
Loss = 6.9690e-03, PNorm = 146.0054, GNorm = 0.1439, lr_0 = 3.6867e-04
Loss = 6.3804e-03, PNorm = 146.0162, GNorm = 0.2067, lr_0 = 3.6842e-04
Loss = 6.7755e-03, PNorm = 146.0321, GNorm = 0.6599, lr_0 = 3.6816e-04
Loss = 5.1231e-03, PNorm = 146.0484, GNorm = 0.1465, lr_0 = 3.6791e-04
Loss = 4.9972e-03, PNorm = 146.0595, GNorm = 0.2811, lr_0 = 3.6766e-04
Loss = 5.9825e-03, PNorm = 146.0722, GNorm = 0.2312, lr_0 = 3.6741e-04
Loss = 6.1982e-03, PNorm = 146.0847, GNorm = 0.2360, lr_0 = 3.6716e-04
Loss = 6.2203e-03, PNorm = 146.0989, GNorm = 0.1927, lr_0 = 3.6690e-04
Loss = 7.1475e-03, PNorm = 146.1128, GNorm = 0.1334, lr_0 = 3.6665e-04
Loss = 5.8612e-03, PNorm = 146.1257, GNorm = 0.0901, lr_0 = 3.6640e-04
Loss = 6.6063e-03, PNorm = 146.1389, GNorm = 0.1340, lr_0 = 3.6615e-04
Loss = 6.7021e-03, PNorm = 146.1514, GNorm = 0.3654, lr_0 = 3.6590e-04
Loss = 4.7511e-03, PNorm = 146.1651, GNorm = 0.0632, lr_0 = 3.6565e-04
Loss = 5.4634e-03, PNorm = 146.1784, GNorm = 0.1041, lr_0 = 3.6540e-04
Loss = 5.9857e-03, PNorm = 146.1912, GNorm = 0.0983, lr_0 = 3.6515e-04
Loss = 5.3014e-03, PNorm = 146.2008, GNorm = 0.1540, lr_0 = 3.6490e-04
Loss = 5.9876e-03, PNorm = 146.2161, GNorm = 0.1517, lr_0 = 3.6465e-04
Loss = 6.7047e-03, PNorm = 146.2281, GNorm = 0.2573, lr_0 = 3.6440e-04
Loss = 6.1048e-03, PNorm = 146.2434, GNorm = 0.0919, lr_0 = 3.6415e-04
Loss = 7.1128e-03, PNorm = 146.2575, GNorm = 0.1660, lr_0 = 3.6390e-04
Loss = 5.8052e-03, PNorm = 146.2673, GNorm = 0.0652, lr_0 = 3.6365e-04
Loss = 6.5406e-03, PNorm = 146.2820, GNorm = 0.4877, lr_0 = 3.6340e-04
Loss = 7.0196e-03, PNorm = 146.2992, GNorm = 0.2769, lr_0 = 3.6315e-04
Loss = 5.7470e-03, PNorm = 146.3146, GNorm = 0.1454, lr_0 = 3.6290e-04
Loss = 6.6562e-03, PNorm = 146.3246, GNorm = 0.1054, lr_0 = 3.6266e-04
Loss = 6.1837e-03, PNorm = 146.3371, GNorm = 0.1536, lr_0 = 3.6241e-04
Loss = 5.7052e-03, PNorm = 146.3479, GNorm = 0.1644, lr_0 = 3.6216e-04
Loss = 6.1296e-03, PNorm = 146.3581, GNorm = 0.1758, lr_0 = 3.6191e-04
Loss = 5.3189e-03, PNorm = 146.3700, GNorm = 0.1553, lr_0 = 3.6166e-04
Loss = 5.6098e-03, PNorm = 146.3834, GNorm = 0.0997, lr_0 = 3.6141e-04
Loss = 6.3931e-03, PNorm = 146.3957, GNorm = 0.1807, lr_0 = 3.6117e-04
Loss = 8.0044e-03, PNorm = 146.4066, GNorm = 0.3844, lr_0 = 3.6092e-04
Loss = 5.5039e-03, PNorm = 146.4187, GNorm = 0.3282, lr_0 = 3.6067e-04
Loss = 5.0341e-03, PNorm = 146.4305, GNorm = 0.2168, lr_0 = 3.6043e-04
Loss = 6.5663e-03, PNorm = 146.4461, GNorm = 0.5358, lr_0 = 3.6018e-04
Loss = 5.2859e-03, PNorm = 146.4666, GNorm = 0.2340, lr_0 = 3.5993e-04
Loss = 6.2324e-03, PNorm = 146.4821, GNorm = 0.2127, lr_0 = 3.5969e-04
Loss = 7.5190e-03, PNorm = 146.4957, GNorm = 0.2325, lr_0 = 3.5944e-04
Loss = 6.0040e-03, PNorm = 146.5091, GNorm = 0.2907, lr_0 = 3.5919e-04
Loss = 7.6815e-03, PNorm = 146.5234, GNorm = 0.4309, lr_0 = 3.5895e-04
Loss = 6.9521e-03, PNorm = 146.5373, GNorm = 0.1690, lr_0 = 3.5870e-04
Loss = 7.1695e-03, PNorm = 146.5511, GNorm = 0.1940, lr_0 = 3.5845e-04
Loss = 6.6188e-03, PNorm = 146.5680, GNorm = 0.1427, lr_0 = 3.5821e-04
Loss = 5.6459e-03, PNorm = 146.5862, GNorm = 0.3379, lr_0 = 3.5796e-04
Loss = 5.5476e-03, PNorm = 146.5994, GNorm = 0.2329, lr_0 = 3.5772e-04
Loss = 7.3301e-03, PNorm = 146.6097, GNorm = 0.1438, lr_0 = 3.5747e-04
Loss = 5.3541e-03, PNorm = 146.6209, GNorm = 0.0920, lr_0 = 3.5723e-04
Loss = 6.5412e-03, PNorm = 146.6376, GNorm = 0.3393, lr_0 = 3.5698e-04
Loss = 5.8421e-03, PNorm = 146.6539, GNorm = 0.3729, lr_0 = 3.5674e-04
Loss = 4.7192e-03, PNorm = 146.6688, GNorm = 0.1710, lr_0 = 3.5650e-04
Loss = 5.8785e-03, PNorm = 146.6781, GNorm = 0.1196, lr_0 = 3.5625e-04
Loss = 7.2003e-03, PNorm = 146.6929, GNorm = 0.1520, lr_0 = 3.5601e-04
Loss = 5.7292e-03, PNorm = 146.7048, GNorm = 0.3196, lr_0 = 3.5576e-04
Loss = 5.6750e-03, PNorm = 146.7190, GNorm = 0.5960, lr_0 = 3.5552e-04
Loss = 5.6361e-03, PNorm = 146.7286, GNorm = 0.1943, lr_0 = 3.5528e-04
Loss = 5.3879e-03, PNorm = 146.7405, GNorm = 0.1077, lr_0 = 3.5503e-04
Loss = 6.9476e-03, PNorm = 146.7556, GNorm = 0.3350, lr_0 = 3.5479e-04
Loss = 9.7557e-03, PNorm = 146.7708, GNorm = 0.2063, lr_0 = 3.5455e-04
Loss = 5.1234e-03, PNorm = 146.7853, GNorm = 0.3389, lr_0 = 3.5430e-04
Loss = 7.4961e-03, PNorm = 146.7980, GNorm = 0.1968, lr_0 = 3.5406e-04
Loss = 4.2481e-03, PNorm = 146.8131, GNorm = 0.1227, lr_0 = 3.5382e-04
Loss = 6.2524e-03, PNorm = 146.8243, GNorm = 0.3727, lr_0 = 3.5358e-04
Loss = 6.7163e-03, PNorm = 146.8400, GNorm = 0.2272, lr_0 = 3.5333e-04
Loss = 6.5461e-03, PNorm = 146.8543, GNorm = 0.1950, lr_0 = 3.5309e-04
Loss = 7.7569e-03, PNorm = 146.8725, GNorm = 0.0708, lr_0 = 3.5285e-04
Loss = 6.1357e-03, PNorm = 146.8907, GNorm = 0.1305, lr_0 = 3.5261e-04
Loss = 5.1621e-03, PNorm = 146.9057, GNorm = 0.2056, lr_0 = 3.5237e-04
Loss = 6.6588e-03, PNorm = 146.9233, GNorm = 0.4026, lr_0 = 3.5212e-04
Loss = 5.3699e-03, PNorm = 146.9352, GNorm = 0.2624, lr_0 = 3.5188e-04
Loss = 5.2201e-03, PNorm = 146.9492, GNorm = 0.1599, lr_0 = 3.5164e-04
Loss = 8.1132e-03, PNorm = 146.9612, GNorm = 0.2109, lr_0 = 3.5140e-04
Loss = 5.6353e-03, PNorm = 146.9763, GNorm = 0.1582, lr_0 = 3.5116e-04
Loss = 5.8639e-03, PNorm = 146.9893, GNorm = 0.1824, lr_0 = 3.5092e-04
Loss = 5.9584e-03, PNorm = 147.0013, GNorm = 0.2986, lr_0 = 3.5068e-04
Loss = 8.5503e-03, PNorm = 147.0136, GNorm = 0.1017, lr_0 = 3.5044e-04
Loss = 6.1709e-03, PNorm = 147.0263, GNorm = 0.3168, lr_0 = 3.5020e-04
Loss = 5.4096e-03, PNorm = 147.0439, GNorm = 0.4384, lr_0 = 3.4996e-04
Loss = 6.1072e-03, PNorm = 147.0586, GNorm = 0.3330, lr_0 = 3.4972e-04
Loss = 6.1136e-03, PNorm = 147.0732, GNorm = 0.3145, lr_0 = 3.4948e-04
Loss = 6.9347e-03, PNorm = 147.0903, GNorm = 0.1193, lr_0 = 3.4924e-04
Loss = 4.8880e-03, PNorm = 147.1038, GNorm = 0.2087, lr_0 = 3.4900e-04
Loss = 6.7124e-03, PNorm = 147.1178, GNorm = 0.3822, lr_0 = 3.4876e-04
Loss = 5.7155e-03, PNorm = 147.1312, GNorm = 0.1802, lr_0 = 3.4852e-04
Loss = 7.2248e-03, PNorm = 147.1506, GNorm = 0.1974, lr_0 = 3.4828e-04
Loss = 7.0888e-03, PNorm = 147.1632, GNorm = 0.2909, lr_0 = 3.4805e-04
Loss = 4.8035e-03, PNorm = 147.1739, GNorm = 0.2373, lr_0 = 3.4781e-04
Loss = 7.3582e-03, PNorm = 147.1863, GNorm = 0.1763, lr_0 = 3.4757e-04
Loss = 6.6514e-03, PNorm = 147.1979, GNorm = 0.2033, lr_0 = 3.4733e-04
Loss = 7.5011e-03, PNorm = 147.2125, GNorm = 0.1251, lr_0 = 3.4709e-04
Loss = 7.7707e-03, PNorm = 147.2303, GNorm = 0.4748, lr_0 = 3.4686e-04
Loss = 9.4335e-03, PNorm = 147.2466, GNorm = 0.6585, lr_0 = 3.4662e-04
Loss = 8.1504e-03, PNorm = 147.2545, GNorm = 0.1833, lr_0 = 3.4638e-04
Loss = 6.6813e-03, PNorm = 147.2689, GNorm = 0.1072, lr_0 = 3.4614e-04
Loss = 5.7228e-03, PNorm = 147.2869, GNorm = 0.1550, lr_0 = 3.4591e-04
Loss = 5.8455e-03, PNorm = 147.2975, GNorm = 0.1853, lr_0 = 3.4567e-04
Loss = 5.9886e-03, PNorm = 147.3101, GNorm = 0.1835, lr_0 = 3.4543e-04
Loss = 6.7627e-03, PNorm = 147.3230, GNorm = 0.3635, lr_0 = 3.4520e-04
Loss = 7.3847e-03, PNorm = 147.3406, GNorm = 0.3179, lr_0 = 3.4496e-04
Loss = 5.8616e-03, PNorm = 147.3580, GNorm = 0.0719, lr_0 = 3.4472e-04
Loss = 6.0399e-03, PNorm = 147.3744, GNorm = 0.1079, lr_0 = 3.4449e-04
Loss = 5.8574e-03, PNorm = 147.3926, GNorm = 0.2134, lr_0 = 3.4425e-04
Loss = 6.3146e-03, PNorm = 147.4095, GNorm = 0.1850, lr_0 = 3.4402e-04
Loss = 7.2448e-03, PNorm = 147.4223, GNorm = 0.0754, lr_0 = 3.4378e-04
Loss = 5.3866e-03, PNorm = 147.4351, GNorm = 0.1145, lr_0 = 3.4354e-04
Loss = 6.0334e-03, PNorm = 147.4463, GNorm = 0.0817, lr_0 = 3.4331e-04
Validation mae = 0.480489
Epoch 15
Loss = 4.6472e-03, PNorm = 147.4579, GNorm = 0.1113, lr_0 = 3.4307e-04
Loss = 6.1141e-03, PNorm = 147.4741, GNorm = 0.0820, lr_0 = 3.4284e-04
Loss = 5.8485e-03, PNorm = 147.4866, GNorm = 0.2240, lr_0 = 3.4260e-04
Loss = 5.0064e-03, PNorm = 147.4961, GNorm = 0.1353, lr_0 = 3.4237e-04
Loss = 6.1271e-03, PNorm = 147.5064, GNorm = 0.2942, lr_0 = 3.4213e-04
Loss = 4.5618e-03, PNorm = 147.5152, GNorm = 0.3247, lr_0 = 3.4190e-04
Loss = 4.8241e-03, PNorm = 147.5271, GNorm = 0.2139, lr_0 = 3.4167e-04
Loss = 5.2934e-03, PNorm = 147.5388, GNorm = 0.2338, lr_0 = 3.4143e-04
Loss = 6.8806e-03, PNorm = 147.5498, GNorm = 0.2456, lr_0 = 3.4120e-04
Loss = 5.8544e-03, PNorm = 147.5624, GNorm = 0.4222, lr_0 = 3.4096e-04
Loss = 5.3634e-03, PNorm = 147.5736, GNorm = 0.1738, lr_0 = 3.4073e-04
Loss = 4.5470e-03, PNorm = 147.5836, GNorm = 0.2710, lr_0 = 3.4050e-04
Loss = 5.7732e-03, PNorm = 147.5926, GNorm = 0.6296, lr_0 = 3.4026e-04
Loss = 6.1999e-03, PNorm = 147.6006, GNorm = 0.5898, lr_0 = 3.4003e-04
Loss = 5.3562e-03, PNorm = 147.6123, GNorm = 0.3210, lr_0 = 3.3980e-04
Loss = 7.4841e-03, PNorm = 147.6214, GNorm = 0.2129, lr_0 = 3.3956e-04
Loss = 4.7964e-03, PNorm = 147.6316, GNorm = 0.3251, lr_0 = 3.3933e-04
Loss = 4.8284e-03, PNorm = 147.6425, GNorm = 0.2230, lr_0 = 3.3910e-04
Loss = 5.6880e-03, PNorm = 147.6565, GNorm = 0.1667, lr_0 = 3.3887e-04
Loss = 5.2778e-03, PNorm = 147.6649, GNorm = 0.1401, lr_0 = 3.3864e-04
Loss = 4.9274e-03, PNorm = 147.6725, GNorm = 0.1429, lr_0 = 3.3840e-04
Loss = 5.3945e-03, PNorm = 147.6822, GNorm = 0.1002, lr_0 = 3.3817e-04
Loss = 4.2974e-03, PNorm = 147.6948, GNorm = 0.4845, lr_0 = 3.3794e-04
Loss = 8.6788e-03, PNorm = 147.7047, GNorm = 0.1335, lr_0 = 3.3771e-04
Loss = 4.6083e-03, PNorm = 147.7159, GNorm = 0.1372, lr_0 = 3.3748e-04
Loss = 4.8056e-03, PNorm = 147.7252, GNorm = 0.3000, lr_0 = 3.3725e-04
Loss = 5.4855e-03, PNorm = 147.7318, GNorm = 0.3781, lr_0 = 3.3701e-04
Loss = 6.1395e-03, PNorm = 147.7421, GNorm = 0.2013, lr_0 = 3.3678e-04
Loss = 4.2430e-03, PNorm = 147.7520, GNorm = 0.2573, lr_0 = 3.3655e-04
Loss = 4.4927e-03, PNorm = 147.7639, GNorm = 0.2980, lr_0 = 3.3632e-04
Loss = 4.2145e-03, PNorm = 147.7758, GNorm = 0.1235, lr_0 = 3.3609e-04
Loss = 5.0174e-03, PNorm = 147.7892, GNorm = 0.0811, lr_0 = 3.3586e-04
Loss = 3.7997e-03, PNorm = 147.7996, GNorm = 0.2204, lr_0 = 3.3563e-04
Loss = 5.6087e-03, PNorm = 147.8086, GNorm = 0.2397, lr_0 = 3.3540e-04
Loss = 5.0270e-03, PNorm = 147.8222, GNorm = 0.2000, lr_0 = 3.3517e-04
Loss = 5.1554e-03, PNorm = 147.8317, GNorm = 0.1195, lr_0 = 3.3494e-04
Loss = 5.8752e-03, PNorm = 147.8398, GNorm = 0.3764, lr_0 = 3.3471e-04
Loss = 5.6658e-03, PNorm = 147.8551, GNorm = 0.4008, lr_0 = 3.3448e-04
Loss = 4.6820e-03, PNorm = 147.8675, GNorm = 0.1281, lr_0 = 3.3425e-04
Loss = 5.1296e-03, PNorm = 147.8788, GNorm = 0.2126, lr_0 = 3.3403e-04
Loss = 5.6177e-03, PNorm = 147.8924, GNorm = 0.3605, lr_0 = 3.3380e-04
Loss = 7.0697e-03, PNorm = 147.9027, GNorm = 0.3428, lr_0 = 3.3357e-04
Loss = 6.4197e-03, PNorm = 147.9147, GNorm = 0.1735, lr_0 = 3.3334e-04
Loss = 5.1677e-03, PNorm = 147.9268, GNorm = 0.6123, lr_0 = 3.3311e-04
Loss = 4.5064e-03, PNorm = 147.9379, GNorm = 0.3375, lr_0 = 3.3288e-04
Loss = 5.2306e-03, PNorm = 147.9475, GNorm = 0.2293, lr_0 = 3.3265e-04
Loss = 7.0940e-03, PNorm = 147.9575, GNorm = 0.2428, lr_0 = 3.3243e-04
Loss = 6.1027e-03, PNorm = 147.9680, GNorm = 0.2912, lr_0 = 3.3220e-04
Loss = 4.8576e-03, PNorm = 147.9810, GNorm = 0.0981, lr_0 = 3.3197e-04
Loss = 4.5781e-03, PNorm = 147.9929, GNorm = 0.1929, lr_0 = 3.3174e-04
Loss = 8.0154e-03, PNorm = 148.0025, GNorm = 0.2287, lr_0 = 3.3152e-04
Loss = 4.7141e-03, PNorm = 148.0152, GNorm = 0.0997, lr_0 = 3.3129e-04
Loss = 4.3812e-03, PNorm = 148.0271, GNorm = 0.0987, lr_0 = 3.3106e-04
Loss = 5.0420e-03, PNorm = 148.0358, GNorm = 0.4836, lr_0 = 3.3084e-04
Loss = 5.4693e-03, PNorm = 148.0487, GNorm = 0.6034, lr_0 = 3.3061e-04
Loss = 5.1235e-03, PNorm = 148.0606, GNorm = 0.3480, lr_0 = 3.3038e-04
Loss = 5.6267e-03, PNorm = 148.0716, GNorm = 0.2395, lr_0 = 3.3016e-04
Loss = 5.4248e-03, PNorm = 148.0847, GNorm = 0.2474, lr_0 = 3.2993e-04
Loss = 4.7522e-03, PNorm = 148.0941, GNorm = 0.4480, lr_0 = 3.2970e-04
Loss = 4.7193e-03, PNorm = 148.1031, GNorm = 0.2713, lr_0 = 3.2948e-04
Loss = 4.0439e-03, PNorm = 148.1135, GNorm = 0.3005, lr_0 = 3.2925e-04
Loss = 5.1915e-03, PNorm = 148.1263, GNorm = 0.2915, lr_0 = 3.2903e-04
Loss = 6.9573e-03, PNorm = 148.1384, GNorm = 0.2639, lr_0 = 3.2880e-04
Loss = 5.6958e-03, PNorm = 148.1501, GNorm = 0.1954, lr_0 = 3.2858e-04
Loss = 4.9794e-03, PNorm = 148.1628, GNorm = 0.4369, lr_0 = 3.2835e-04
Loss = 4.6890e-03, PNorm = 148.1743, GNorm = 0.1186, lr_0 = 3.2813e-04
Loss = 5.7286e-03, PNorm = 148.1863, GNorm = 0.1298, lr_0 = 3.2790e-04
Loss = 5.5218e-03, PNorm = 148.2014, GNorm = 0.1603, lr_0 = 3.2768e-04
Loss = 5.9283e-03, PNorm = 148.2146, GNorm = 0.3588, lr_0 = 3.2745e-04
Loss = 5.2311e-03, PNorm = 148.2293, GNorm = 0.1335, lr_0 = 3.2723e-04
Loss = 6.3843e-03, PNorm = 148.2397, GNorm = 0.1026, lr_0 = 3.2700e-04
Loss = 4.0617e-03, PNorm = 148.2493, GNorm = 0.1108, lr_0 = 3.2678e-04
Loss = 4.3626e-03, PNorm = 148.2612, GNorm = 0.2553, lr_0 = 3.2656e-04
Loss = 5.1302e-03, PNorm = 148.2746, GNorm = 0.2302, lr_0 = 3.2633e-04
Loss = 4.8967e-03, PNorm = 148.2869, GNorm = 0.3478, lr_0 = 3.2611e-04
Loss = 7.0178e-03, PNorm = 148.2976, GNorm = 0.3516, lr_0 = 3.2589e-04
Loss = 4.4791e-03, PNorm = 148.3086, GNorm = 0.2391, lr_0 = 3.2566e-04
Loss = 4.6453e-03, PNorm = 148.3170, GNorm = 0.3747, lr_0 = 3.2544e-04
Loss = 5.4682e-03, PNorm = 148.3278, GNorm = 0.1714, lr_0 = 3.2522e-04
Loss = 8.6397e-03, PNorm = 148.3423, GNorm = 0.1043, lr_0 = 3.2499e-04
Loss = 4.9178e-03, PNorm = 148.3518, GNorm = 0.1699, lr_0 = 3.2477e-04
Loss = 4.3405e-03, PNorm = 148.3608, GNorm = 0.2113, lr_0 = 3.2455e-04
Loss = 4.9038e-03, PNorm = 148.3724, GNorm = 0.2211, lr_0 = 3.2433e-04
Loss = 4.4217e-03, PNorm = 148.3849, GNorm = 0.1883, lr_0 = 3.2410e-04
Loss = 4.6792e-03, PNorm = 148.3959, GNorm = 0.2867, lr_0 = 3.2388e-04
Loss = 4.8779e-03, PNorm = 148.4077, GNorm = 0.2026, lr_0 = 3.2366e-04
Loss = 5.1259e-03, PNorm = 148.4143, GNorm = 0.1992, lr_0 = 3.2344e-04
Loss = 6.6140e-03, PNorm = 148.4225, GNorm = 0.1579, lr_0 = 3.2322e-04
Loss = 4.6147e-03, PNorm = 148.4324, GNorm = 0.0977, lr_0 = 3.2300e-04
Loss = 6.1916e-03, PNorm = 148.4428, GNorm = 0.1401, lr_0 = 3.2277e-04
Loss = 4.4379e-03, PNorm = 148.4548, GNorm = 0.4090, lr_0 = 3.2255e-04
Loss = 5.0684e-03, PNorm = 148.4668, GNorm = 0.1360, lr_0 = 3.2233e-04
Loss = 6.6856e-03, PNorm = 148.4768, GNorm = 0.3795, lr_0 = 3.2211e-04
Loss = 5.9717e-03, PNorm = 148.4884, GNorm = 0.1780, lr_0 = 3.2189e-04
Loss = 6.6824e-03, PNorm = 148.5013, GNorm = 0.1376, lr_0 = 3.2167e-04
Loss = 5.4506e-03, PNorm = 148.5191, GNorm = 0.2016, lr_0 = 3.2145e-04
Loss = 4.0065e-03, PNorm = 148.5350, GNorm = 0.2601, lr_0 = 3.2123e-04
Loss = 4.6023e-03, PNorm = 148.5469, GNorm = 0.1800, lr_0 = 3.2101e-04
Loss = 4.3360e-03, PNorm = 148.5535, GNorm = 0.0821, lr_0 = 3.2079e-04
Loss = 4.8847e-03, PNorm = 148.5616, GNorm = 0.1025, lr_0 = 3.2057e-04
Loss = 5.7392e-03, PNorm = 148.5749, GNorm = 0.3305, lr_0 = 3.2035e-04
Loss = 6.0128e-03, PNorm = 148.5879, GNorm = 0.0748, lr_0 = 3.2013e-04
Loss = 6.8115e-03, PNorm = 148.6002, GNorm = 0.0916, lr_0 = 3.1991e-04
Loss = 5.1185e-03, PNorm = 148.6140, GNorm = 0.2593, lr_0 = 3.1969e-04
Loss = 5.0338e-03, PNorm = 148.6250, GNorm = 0.1110, lr_0 = 3.1947e-04
Loss = 5.3053e-03, PNorm = 148.6371, GNorm = 0.1879, lr_0 = 3.1925e-04
Loss = 4.4812e-03, PNorm = 148.6517, GNorm = 0.2568, lr_0 = 3.1904e-04
Loss = 6.4851e-03, PNorm = 148.6636, GNorm = 0.1432, lr_0 = 3.1882e-04
Loss = 4.0408e-03, PNorm = 148.6765, GNorm = 0.1005, lr_0 = 3.1860e-04
Loss = 4.2998e-03, PNorm = 148.6911, GNorm = 0.2065, lr_0 = 3.1838e-04
Loss = 6.2022e-03, PNorm = 148.6995, GNorm = 0.3388, lr_0 = 3.1816e-04
Loss = 5.5045e-03, PNorm = 148.7120, GNorm = 0.3284, lr_0 = 3.1794e-04
Loss = 5.3352e-03, PNorm = 148.7246, GNorm = 0.2427, lr_0 = 3.1773e-04
Loss = 6.7201e-03, PNorm = 148.7369, GNorm = 0.1413, lr_0 = 3.1751e-04
Loss = 5.7133e-03, PNorm = 148.7521, GNorm = 0.5479, lr_0 = 3.1729e-04
Loss = 7.3988e-03, PNorm = 148.7670, GNorm = 0.2462, lr_0 = 3.1707e-04
Loss = 5.0249e-03, PNorm = 148.7783, GNorm = 0.1420, lr_0 = 3.1686e-04
Loss = 4.5259e-03, PNorm = 148.7879, GNorm = 0.2741, lr_0 = 3.1664e-04
Loss = 4.5505e-03, PNorm = 148.7980, GNorm = 0.0682, lr_0 = 3.1642e-04
Loss = 4.8999e-03, PNorm = 148.8076, GNorm = 0.1764, lr_0 = 3.1621e-04
Validation mae = 0.478692
Epoch 16
Loss = 4.6481e-03, PNorm = 148.8171, GNorm = 0.1317, lr_0 = 3.1599e-04
Loss = 4.3575e-03, PNorm = 148.8279, GNorm = 0.3460, lr_0 = 3.1577e-04
Loss = 4.3683e-03, PNorm = 148.8344, GNorm = 0.2460, lr_0 = 3.1556e-04
Loss = 4.3978e-03, PNorm = 148.8422, GNorm = 0.5888, lr_0 = 3.1534e-04
Loss = 4.4622e-03, PNorm = 148.8478, GNorm = 0.1646, lr_0 = 3.1512e-04
Loss = 5.0605e-03, PNorm = 148.8555, GNorm = 0.1807, lr_0 = 3.1491e-04
Loss = 4.3748e-03, PNorm = 148.8667, GNorm = 0.2648, lr_0 = 3.1469e-04
Loss = 6.0583e-03, PNorm = 148.8765, GNorm = 0.1686, lr_0 = 3.1448e-04
Loss = 3.9479e-03, PNorm = 148.8850, GNorm = 0.0998, lr_0 = 3.1426e-04
Loss = 4.4022e-03, PNorm = 148.8913, GNorm = 0.1505, lr_0 = 3.1405e-04
Loss = 3.7299e-03, PNorm = 148.9006, GNorm = 0.1888, lr_0 = 3.1383e-04
Loss = 5.0446e-03, PNorm = 148.9062, GNorm = 0.1103, lr_0 = 3.1362e-04
Loss = 4.2952e-03, PNorm = 148.9131, GNorm = 0.0754, lr_0 = 3.1340e-04
Loss = 4.6077e-03, PNorm = 148.9233, GNorm = 0.2828, lr_0 = 3.1319e-04
Loss = 4.5172e-03, PNorm = 148.9335, GNorm = 0.1127, lr_0 = 3.1297e-04
Loss = 4.4129e-03, PNorm = 148.9403, GNorm = 0.0832, lr_0 = 3.1276e-04
Loss = 4.0094e-03, PNorm = 148.9454, GNorm = 0.2130, lr_0 = 3.1254e-04
Loss = 4.2735e-03, PNorm = 148.9524, GNorm = 0.2627, lr_0 = 3.1233e-04
Loss = 3.8397e-03, PNorm = 148.9613, GNorm = 0.3002, lr_0 = 3.1212e-04
Loss = 5.2307e-03, PNorm = 148.9711, GNorm = 0.0621, lr_0 = 3.1190e-04
Loss = 3.9246e-03, PNorm = 148.9797, GNorm = 0.1694, lr_0 = 3.1169e-04
Loss = 3.9506e-03, PNorm = 148.9910, GNorm = 0.2935, lr_0 = 3.1147e-04
Loss = 4.3764e-03, PNorm = 149.0015, GNorm = 0.2872, lr_0 = 3.1126e-04
Loss = 3.6443e-03, PNorm = 149.0114, GNorm = 0.0508, lr_0 = 3.1105e-04
Loss = 4.1826e-03, PNorm = 149.0214, GNorm = 0.0629, lr_0 = 3.1083e-04
Loss = 4.1674e-03, PNorm = 149.0290, GNorm = 0.2956, lr_0 = 3.1062e-04
Loss = 4.0711e-03, PNorm = 149.0375, GNorm = 0.2630, lr_0 = 3.1041e-04
Loss = 3.9388e-03, PNorm = 149.0445, GNorm = 0.1365, lr_0 = 3.1020e-04
Loss = 4.3710e-03, PNorm = 149.0497, GNorm = 0.3807, lr_0 = 3.0998e-04
Loss = 3.9181e-03, PNorm = 149.0567, GNorm = 0.1547, lr_0 = 3.0977e-04
Loss = 4.3965e-03, PNorm = 149.0666, GNorm = 0.0896, lr_0 = 3.0956e-04
Loss = 4.1107e-03, PNorm = 149.0770, GNorm = 0.1672, lr_0 = 3.0935e-04
Loss = 3.7939e-03, PNorm = 149.0831, GNorm = 0.3239, lr_0 = 3.0914e-04
Loss = 4.2611e-03, PNorm = 149.0939, GNorm = 0.2541, lr_0 = 3.0892e-04
Loss = 3.9354e-03, PNorm = 149.1014, GNorm = 0.3088, lr_0 = 3.0871e-04
Loss = 4.0030e-03, PNorm = 149.1110, GNorm = 0.2063, lr_0 = 3.0850e-04
Loss = 4.0188e-03, PNorm = 149.1231, GNorm = 0.1286, lr_0 = 3.0829e-04
Loss = 3.9784e-03, PNorm = 149.1281, GNorm = 0.1003, lr_0 = 3.0808e-04
Loss = 4.0119e-03, PNorm = 149.1359, GNorm = 0.2342, lr_0 = 3.0787e-04
Loss = 4.0919e-03, PNorm = 149.1452, GNorm = 0.0866, lr_0 = 3.0766e-04
Loss = 4.0181e-03, PNorm = 149.1544, GNorm = 0.1484, lr_0 = 3.0745e-04
Loss = 5.4900e-03, PNorm = 149.1614, GNorm = 0.1317, lr_0 = 3.0723e-04
Loss = 4.6512e-03, PNorm = 149.1684, GNorm = 0.2171, lr_0 = 3.0702e-04
Loss = 4.8502e-03, PNorm = 149.1780, GNorm = 0.3626, lr_0 = 3.0681e-04
Loss = 3.3952e-03, PNorm = 149.1896, GNorm = 0.2330, lr_0 = 3.0660e-04
Loss = 3.7956e-03, PNorm = 149.2001, GNorm = 0.1519, lr_0 = 3.0639e-04
Loss = 5.9511e-03, PNorm = 149.2088, GNorm = 0.2139, lr_0 = 3.0618e-04
Loss = 4.7504e-03, PNorm = 149.2235, GNorm = 0.1532, lr_0 = 3.0597e-04
Loss = 3.8464e-03, PNorm = 149.2332, GNorm = 0.2195, lr_0 = 3.0576e-04
Loss = 3.9686e-03, PNorm = 149.2401, GNorm = 0.0542, lr_0 = 3.0555e-04
Loss = 3.9997e-03, PNorm = 149.2476, GNorm = 0.1001, lr_0 = 3.0535e-04
Loss = 5.1452e-03, PNorm = 149.2560, GNorm = 0.2452, lr_0 = 3.0514e-04
Loss = 3.9086e-03, PNorm = 149.2645, GNorm = 0.2472, lr_0 = 3.0493e-04
Loss = 3.3997e-03, PNorm = 149.2756, GNorm = 0.1907, lr_0 = 3.0472e-04
Loss = 4.8669e-03, PNorm = 149.2870, GNorm = 0.2904, lr_0 = 3.0451e-04
Loss = 4.4081e-03, PNorm = 149.2971, GNorm = 0.4150, lr_0 = 3.0430e-04
Loss = 8.4705e-03, PNorm = 149.3090, GNorm = 0.0787, lr_0 = 3.0409e-04
Loss = 3.7586e-03, PNorm = 149.3182, GNorm = 0.3393, lr_0 = 3.0388e-04
Loss = 4.8447e-03, PNorm = 149.3286, GNorm = 0.3030, lr_0 = 3.0368e-04
Loss = 7.3919e-03, PNorm = 149.3410, GNorm = 0.2502, lr_0 = 3.0347e-04
Loss = 6.2782e-03, PNorm = 149.3508, GNorm = 0.4630, lr_0 = 3.0326e-04
Loss = 6.4041e-03, PNorm = 149.3590, GNorm = 0.4244, lr_0 = 3.0305e-04
Loss = 5.7502e-03, PNorm = 149.3690, GNorm = 0.1782, lr_0 = 3.0284e-04
Loss = 3.1178e-03, PNorm = 149.3789, GNorm = 0.2813, lr_0 = 3.0264e-04
Loss = 5.1650e-03, PNorm = 149.3866, GNorm = 0.1403, lr_0 = 3.0243e-04
Loss = 6.9174e-03, PNorm = 149.3947, GNorm = 0.1992, lr_0 = 3.0222e-04
Loss = 5.8045e-03, PNorm = 149.4056, GNorm = 0.1150, lr_0 = 3.0202e-04
Loss = 4.5311e-03, PNorm = 149.4205, GNorm = 0.2290, lr_0 = 3.0181e-04
Loss = 3.8042e-03, PNorm = 149.4338, GNorm = 0.1093, lr_0 = 3.0160e-04
Loss = 6.9143e-03, PNorm = 149.4454, GNorm = 0.2132, lr_0 = 3.0140e-04
Loss = 5.3469e-03, PNorm = 149.4565, GNorm = 0.1431, lr_0 = 3.0119e-04
Loss = 3.8464e-03, PNorm = 149.4647, GNorm = 0.1459, lr_0 = 3.0098e-04
Loss = 4.4873e-03, PNorm = 149.4742, GNorm = 0.1796, lr_0 = 3.0078e-04
Loss = 5.9314e-03, PNorm = 149.4819, GNorm = 0.3366, lr_0 = 3.0057e-04
Loss = 3.5818e-03, PNorm = 149.4917, GNorm = 0.1050, lr_0 = 3.0036e-04
Loss = 3.7709e-03, PNorm = 149.4994, GNorm = 0.2723, lr_0 = 3.0016e-04
Loss = 4.3084e-03, PNorm = 149.5119, GNorm = 0.2520, lr_0 = 2.9995e-04
Loss = 4.8583e-03, PNorm = 149.5242, GNorm = 0.1613, lr_0 = 2.9975e-04
Loss = 4.4282e-03, PNorm = 149.5354, GNorm = 0.2686, lr_0 = 2.9954e-04
Loss = 6.5112e-03, PNorm = 149.5407, GNorm = 0.2429, lr_0 = 2.9934e-04
Loss = 4.0006e-03, PNorm = 149.5455, GNorm = 0.1895, lr_0 = 2.9913e-04
Loss = 3.9463e-03, PNorm = 149.5524, GNorm = 0.0683, lr_0 = 2.9893e-04
Loss = 7.4148e-03, PNorm = 149.5662, GNorm = 0.1977, lr_0 = 2.9872e-04
Loss = 4.5461e-03, PNorm = 149.5750, GNorm = 0.3401, lr_0 = 2.9852e-04
Loss = 4.2045e-03, PNorm = 149.5787, GNorm = 0.1887, lr_0 = 2.9831e-04
Loss = 4.2321e-03, PNorm = 149.5885, GNorm = 0.1627, lr_0 = 2.9811e-04
Loss = 5.0119e-03, PNorm = 149.6022, GNorm = 0.2049, lr_0 = 2.9790e-04
Loss = 4.1055e-03, PNorm = 149.6127, GNorm = 0.3948, lr_0 = 2.9770e-04
Loss = 4.1053e-03, PNorm = 149.6228, GNorm = 0.1903, lr_0 = 2.9750e-04
Loss = 4.1794e-03, PNorm = 149.6320, GNorm = 0.2845, lr_0 = 2.9729e-04
Loss = 5.5881e-03, PNorm = 149.6408, GNorm = 0.1455, lr_0 = 2.9709e-04
Loss = 4.0114e-03, PNorm = 149.6520, GNorm = 0.2036, lr_0 = 2.9689e-04
Loss = 4.5438e-03, PNorm = 149.6618, GNorm = 0.6229, lr_0 = 2.9668e-04
Loss = 4.1022e-03, PNorm = 149.6706, GNorm = 0.0686, lr_0 = 2.9648e-04
Loss = 5.5442e-03, PNorm = 149.6793, GNorm = 0.1488, lr_0 = 2.9628e-04
Loss = 6.3054e-03, PNorm = 149.6914, GNorm = 0.3006, lr_0 = 2.9607e-04
Loss = 5.6514e-03, PNorm = 149.7000, GNorm = 0.3183, lr_0 = 2.9587e-04
Loss = 4.0136e-03, PNorm = 149.7127, GNorm = 0.2811, lr_0 = 2.9567e-04
Loss = 4.1308e-03, PNorm = 149.7236, GNorm = 0.1842, lr_0 = 2.9546e-04
Loss = 4.8388e-03, PNorm = 149.7351, GNorm = 0.2418, lr_0 = 2.9526e-04
Loss = 5.0805e-03, PNorm = 149.7479, GNorm = 0.2752, lr_0 = 2.9506e-04
Loss = 3.8108e-03, PNorm = 149.7585, GNorm = 0.1305, lr_0 = 2.9486e-04
Loss = 4.1030e-03, PNorm = 149.7642, GNorm = 0.1056, lr_0 = 2.9466e-04
Loss = 5.1024e-03, PNorm = 149.7757, GNorm = 0.3502, lr_0 = 2.9445e-04
Loss = 3.6898e-03, PNorm = 149.7854, GNorm = 0.0924, lr_0 = 2.9425e-04
Loss = 4.1368e-03, PNorm = 149.7967, GNorm = 0.0963, lr_0 = 2.9405e-04
Loss = 5.7888e-03, PNorm = 149.8083, GNorm = 0.0778, lr_0 = 2.9385e-04
Loss = 4.5115e-03, PNorm = 149.8197, GNorm = 0.2721, lr_0 = 2.9365e-04
Loss = 4.8810e-03, PNorm = 149.8280, GNorm = 0.1595, lr_0 = 2.9345e-04
Loss = 6.5399e-03, PNorm = 149.8347, GNorm = 0.7027, lr_0 = 2.9325e-04
Loss = 5.3702e-03, PNorm = 149.8416, GNorm = 0.2385, lr_0 = 2.9305e-04
Loss = 3.8058e-03, PNorm = 149.8524, GNorm = 0.0621, lr_0 = 2.9284e-04
Loss = 3.9452e-03, PNorm = 149.8620, GNorm = 0.3111, lr_0 = 2.9264e-04
Loss = 4.4570e-03, PNorm = 149.8693, GNorm = 0.1004, lr_0 = 2.9244e-04
Loss = 6.3191e-03, PNorm = 149.8798, GNorm = 0.3137, lr_0 = 2.9224e-04
Loss = 3.7465e-03, PNorm = 149.8905, GNorm = 0.1190, lr_0 = 2.9204e-04
Loss = 4.7945e-03, PNorm = 149.9001, GNorm = 0.1059, lr_0 = 2.9184e-04
Loss = 3.7797e-03, PNorm = 149.9113, GNorm = 0.2496, lr_0 = 2.9164e-04
Loss = 3.8471e-03, PNorm = 149.9234, GNorm = 0.3670, lr_0 = 2.9144e-04
Loss = 4.1049e-03, PNorm = 149.9341, GNorm = 0.6385, lr_0 = 2.9124e-04
Validation mae = 0.477565
Epoch 17
Loss = 3.1311e-03, PNorm = 149.9415, GNorm = 0.1616, lr_0 = 2.9104e-04
Loss = 3.4744e-03, PNorm = 149.9442, GNorm = 0.1913, lr_0 = 2.9084e-04
Loss = 4.4477e-03, PNorm = 149.9520, GNorm = 0.1120, lr_0 = 2.9065e-04
Loss = 3.4053e-03, PNorm = 149.9592, GNorm = 0.1114, lr_0 = 2.9045e-04
Loss = 3.9652e-03, PNorm = 149.9638, GNorm = 0.2589, lr_0 = 2.9025e-04
Loss = 4.5810e-03, PNorm = 149.9740, GNorm = 0.1722, lr_0 = 2.9005e-04
Loss = 7.0907e-03, PNorm = 149.9806, GNorm = 0.1625, lr_0 = 2.8985e-04
Loss = 4.4322e-03, PNorm = 149.9842, GNorm = 0.2150, lr_0 = 2.8965e-04
Loss = 3.4287e-03, PNorm = 149.9913, GNorm = 0.2666, lr_0 = 2.8945e-04
Loss = 4.0352e-03, PNorm = 150.0003, GNorm = 0.1335, lr_0 = 2.8925e-04
Loss = 5.0285e-03, PNorm = 150.0108, GNorm = 0.1811, lr_0 = 2.8906e-04
Loss = 3.5871e-03, PNorm = 150.0187, GNorm = 0.2533, lr_0 = 2.8886e-04
Loss = 4.1757e-03, PNorm = 150.0219, GNorm = 0.4904, lr_0 = 2.8866e-04
Loss = 3.3763e-03, PNorm = 150.0316, GNorm = 0.0937, lr_0 = 2.8846e-04
Loss = 4.0694e-03, PNorm = 150.0399, GNorm = 0.1377, lr_0 = 2.8826e-04
Loss = 3.0858e-03, PNorm = 150.0497, GNorm = 0.1123, lr_0 = 2.8807e-04
Loss = 4.3148e-03, PNorm = 150.0605, GNorm = 0.2068, lr_0 = 2.8787e-04
Loss = 3.5887e-03, PNorm = 150.0696, GNorm = 0.2311, lr_0 = 2.8767e-04
Loss = 5.5146e-03, PNorm = 150.0803, GNorm = 0.1821, lr_0 = 2.8748e-04
Loss = 3.7686e-03, PNorm = 150.0895, GNorm = 0.2469, lr_0 = 2.8728e-04
Loss = 4.4046e-03, PNorm = 150.0998, GNorm = 0.0975, lr_0 = 2.8708e-04
Loss = 3.3251e-03, PNorm = 150.1087, GNorm = 0.1228, lr_0 = 2.8689e-04
Loss = 3.6170e-03, PNorm = 150.1168, GNorm = 0.0889, lr_0 = 2.8669e-04
Loss = 4.2557e-03, PNorm = 150.1232, GNorm = 0.3182, lr_0 = 2.8649e-04
Loss = 3.7573e-03, PNorm = 150.1306, GNorm = 0.0740, lr_0 = 2.8630e-04
Loss = 3.8530e-03, PNorm = 150.1383, GNorm = 0.1410, lr_0 = 2.8610e-04
Loss = 3.0764e-03, PNorm = 150.1439, GNorm = 0.0994, lr_0 = 2.8590e-04
Loss = 4.9018e-03, PNorm = 150.1525, GNorm = 0.2793, lr_0 = 2.8571e-04
Loss = 4.1401e-03, PNorm = 150.1590, GNorm = 0.0987, lr_0 = 2.8551e-04
Loss = 4.6993e-03, PNorm = 150.1656, GNorm = 0.0706, lr_0 = 2.8532e-04
Loss = 3.0633e-03, PNorm = 150.1735, GNorm = 0.1135, lr_0 = 2.8512e-04
Loss = 3.5668e-03, PNorm = 150.1787, GNorm = 0.3216, lr_0 = 2.8493e-04
Loss = 3.4290e-03, PNorm = 150.1854, GNorm = 0.3552, lr_0 = 2.8473e-04
Loss = 3.3465e-03, PNorm = 150.1952, GNorm = 0.1954, lr_0 = 2.8454e-04
Loss = 3.5343e-03, PNorm = 150.2011, GNorm = 0.5796, lr_0 = 2.8434e-04
Loss = 4.6625e-03, PNorm = 150.2092, GNorm = 0.2006, lr_0 = 2.8415e-04
Loss = 3.6967e-03, PNorm = 150.2199, GNorm = 0.1543, lr_0 = 2.8395e-04
Loss = 3.7354e-03, PNorm = 150.2288, GNorm = 0.2363, lr_0 = 2.8376e-04
Loss = 3.4992e-03, PNorm = 150.2385, GNorm = 0.2188, lr_0 = 2.8356e-04
Loss = 3.1869e-03, PNorm = 150.2447, GNorm = 0.1794, lr_0 = 2.8337e-04
Loss = 3.9650e-03, PNorm = 150.2526, GNorm = 0.1074, lr_0 = 2.8317e-04
Loss = 3.4171e-03, PNorm = 150.2602, GNorm = 0.1858, lr_0 = 2.8298e-04
Loss = 3.1669e-03, PNorm = 150.2702, GNorm = 0.1071, lr_0 = 2.8279e-04
Loss = 3.2103e-03, PNorm = 150.2787, GNorm = 0.2496, lr_0 = 2.8259e-04
Loss = 4.2022e-03, PNorm = 150.2834, GNorm = 0.1787, lr_0 = 2.8240e-04
Loss = 3.9692e-03, PNorm = 150.2894, GNorm = 0.2788, lr_0 = 2.8221e-04
Loss = 3.3878e-03, PNorm = 150.2977, GNorm = 0.1843, lr_0 = 2.8201e-04
Loss = 5.2907e-03, PNorm = 150.3065, GNorm = 0.2455, lr_0 = 2.8182e-04
Loss = 5.4739e-03, PNorm = 150.3129, GNorm = 0.4836, lr_0 = 2.8163e-04
Loss = 4.5043e-03, PNorm = 150.3204, GNorm = 0.2372, lr_0 = 2.8143e-04
Loss = 2.9274e-03, PNorm = 150.3279, GNorm = 0.2053, lr_0 = 2.8124e-04
Loss = 3.4736e-03, PNorm = 150.3344, GNorm = 0.0830, lr_0 = 2.8105e-04
Loss = 3.6458e-03, PNorm = 150.3439, GNorm = 0.2297, lr_0 = 2.8085e-04
Loss = 4.4236e-03, PNorm = 150.3532, GNorm = 0.1882, lr_0 = 2.8066e-04
Loss = 7.4099e-03, PNorm = 150.3614, GNorm = 0.2492, lr_0 = 2.8047e-04
Loss = 3.4584e-03, PNorm = 150.3708, GNorm = 0.2342, lr_0 = 2.8028e-04
Loss = 4.0246e-03, PNorm = 150.3761, GNorm = 0.2496, lr_0 = 2.8009e-04
Loss = 5.3019e-03, PNorm = 150.3864, GNorm = 0.8310, lr_0 = 2.7989e-04
Loss = 6.7626e-03, PNorm = 150.3935, GNorm = 0.1101, lr_0 = 2.7970e-04
Loss = 5.0470e-03, PNorm = 150.4036, GNorm = 0.2059, lr_0 = 2.7951e-04
Loss = 3.1796e-03, PNorm = 150.4115, GNorm = 0.0815, lr_0 = 2.7932e-04
Loss = 3.9723e-03, PNorm = 150.4196, GNorm = 0.4781, lr_0 = 2.7913e-04
Loss = 3.8909e-03, PNorm = 150.4269, GNorm = 0.0591, lr_0 = 2.7894e-04
Loss = 3.0710e-03, PNorm = 150.4363, GNorm = 0.1643, lr_0 = 2.7875e-04
Loss = 4.0859e-03, PNorm = 150.4467, GNorm = 0.2011, lr_0 = 2.7855e-04
Loss = 3.9255e-03, PNorm = 150.4555, GNorm = 0.2101, lr_0 = 2.7836e-04
Loss = 3.9344e-03, PNorm = 150.4620, GNorm = 0.2708, lr_0 = 2.7817e-04
Loss = 4.5673e-03, PNorm = 150.4697, GNorm = 0.1323, lr_0 = 2.7798e-04
Loss = 3.1472e-03, PNorm = 150.4798, GNorm = 0.1187, lr_0 = 2.7779e-04
Loss = 3.2596e-03, PNorm = 150.4889, GNorm = 0.0777, lr_0 = 2.7760e-04
Loss = 3.2831e-03, PNorm = 150.4974, GNorm = 0.0903, lr_0 = 2.7741e-04
Loss = 3.9098e-03, PNorm = 150.5037, GNorm = 0.1154, lr_0 = 2.7722e-04
Loss = 3.9468e-03, PNorm = 150.5124, GNorm = 0.1297, lr_0 = 2.7703e-04
Loss = 5.0085e-03, PNorm = 150.5190, GNorm = 0.4257, lr_0 = 2.7684e-04
Loss = 3.3828e-03, PNorm = 150.5285, GNorm = 0.1066, lr_0 = 2.7665e-04
Loss = 4.0805e-03, PNorm = 150.5370, GNorm = 0.2293, lr_0 = 2.7646e-04
Loss = 7.4674e-03, PNorm = 150.5464, GNorm = 0.4200, lr_0 = 2.7627e-04
Loss = 4.5394e-03, PNorm = 150.5541, GNorm = 0.1712, lr_0 = 2.7608e-04
Loss = 3.3489e-03, PNorm = 150.5627, GNorm = 0.1567, lr_0 = 2.7590e-04
Loss = 2.9665e-03, PNorm = 150.5745, GNorm = 0.1595, lr_0 = 2.7571e-04
Loss = 3.6311e-03, PNorm = 150.5894, GNorm = 0.2224, lr_0 = 2.7552e-04
Loss = 3.5153e-03, PNorm = 150.6003, GNorm = 0.1390, lr_0 = 2.7533e-04
Loss = 3.7642e-03, PNorm = 150.6088, GNorm = 0.1320, lr_0 = 2.7514e-04
Loss = 4.0035e-03, PNorm = 150.6153, GNorm = 0.0527, lr_0 = 2.7495e-04
Loss = 3.9253e-03, PNorm = 150.6213, GNorm = 0.2127, lr_0 = 2.7476e-04
Loss = 3.2109e-03, PNorm = 150.6321, GNorm = 0.1472, lr_0 = 2.7457e-04
Loss = 3.8965e-03, PNorm = 150.6400, GNorm = 0.3936, lr_0 = 2.7439e-04
Loss = 3.8570e-03, PNorm = 150.6463, GNorm = 0.0604, lr_0 = 2.7420e-04
Loss = 3.4343e-03, PNorm = 150.6536, GNorm = 0.1454, lr_0 = 2.7401e-04
Loss = 4.6293e-03, PNorm = 150.6603, GNorm = 0.4350, lr_0 = 2.7382e-04
Loss = 4.2159e-03, PNorm = 150.6688, GNorm = 0.0912, lr_0 = 2.7364e-04
Loss = 3.2365e-03, PNorm = 150.6782, GNorm = 0.1580, lr_0 = 2.7345e-04
Loss = 4.5850e-03, PNorm = 150.6889, GNorm = 0.2086, lr_0 = 2.7326e-04
Loss = 3.2662e-03, PNorm = 150.6995, GNorm = 0.1137, lr_0 = 2.7307e-04
Loss = 4.8625e-03, PNorm = 150.7107, GNorm = 0.3129, lr_0 = 2.7289e-04
Loss = 4.0176e-03, PNorm = 150.7185, GNorm = 0.1171, lr_0 = 2.7270e-04
Loss = 3.5240e-03, PNorm = 150.7253, GNorm = 0.1292, lr_0 = 2.7251e-04
Loss = 3.3007e-03, PNorm = 150.7302, GNorm = 0.2566, lr_0 = 2.7233e-04
Loss = 3.6872e-03, PNorm = 150.7368, GNorm = 0.2912, lr_0 = 2.7214e-04
Loss = 3.6527e-03, PNorm = 150.7479, GNorm = 0.2269, lr_0 = 2.7195e-04
Loss = 4.1752e-03, PNorm = 150.7604, GNorm = 0.0726, lr_0 = 2.7177e-04
Loss = 4.1091e-03, PNorm = 150.7713, GNorm = 0.1752, lr_0 = 2.7158e-04
Loss = 5.0063e-03, PNorm = 150.7774, GNorm = 0.1827, lr_0 = 2.7139e-04
Loss = 6.5836e-03, PNorm = 150.7815, GNorm = 0.2069, lr_0 = 2.7121e-04
Loss = 4.1671e-03, PNorm = 150.7869, GNorm = 0.1963, lr_0 = 2.7102e-04
Loss = 3.4009e-03, PNorm = 150.7956, GNorm = 0.0684, lr_0 = 2.7084e-04
Loss = 3.6549e-03, PNorm = 150.8034, GNorm = 0.1195, lr_0 = 2.7065e-04
Loss = 3.2303e-03, PNorm = 150.8082, GNorm = 0.0967, lr_0 = 2.7047e-04
Loss = 3.3895e-03, PNorm = 150.8121, GNorm = 0.3989, lr_0 = 2.7028e-04
Loss = 3.7025e-03, PNorm = 150.8230, GNorm = 0.2172, lr_0 = 2.7010e-04
Loss = 3.4863e-03, PNorm = 150.8311, GNorm = 0.2573, lr_0 = 2.6991e-04
Loss = 4.3917e-03, PNorm = 150.8379, GNorm = 0.1604, lr_0 = 2.6973e-04
Loss = 3.3470e-03, PNorm = 150.8457, GNorm = 0.0766, lr_0 = 2.6954e-04
Loss = 3.7382e-03, PNorm = 150.8539, GNorm = 0.1918, lr_0 = 2.6936e-04
Loss = 3.8668e-03, PNorm = 150.8587, GNorm = 0.0611, lr_0 = 2.6917e-04
Loss = 4.2378e-03, PNorm = 150.8684, GNorm = 0.3567, lr_0 = 2.6899e-04
Loss = 3.5500e-03, PNorm = 150.8785, GNorm = 0.1669, lr_0 = 2.6880e-04
Loss = 6.6740e-03, PNorm = 150.8883, GNorm = 0.0928, lr_0 = 2.6862e-04
Loss = 2.9220e-03, PNorm = 150.9000, GNorm = 0.0555, lr_0 = 2.6844e-04
Loss = 3.3776e-03, PNorm = 150.9073, GNorm = 0.1351, lr_0 = 2.6825e-04
Validation mae = 0.478347
Epoch 18
Loss = 3.6488e-03, PNorm = 150.9112, GNorm = 0.0972, lr_0 = 2.6807e-04
Loss = 2.8684e-03, PNorm = 150.9146, GNorm = 0.1944, lr_0 = 2.6788e-04
Loss = 3.2438e-03, PNorm = 150.9214, GNorm = 0.2123, lr_0 = 2.6770e-04
Loss = 3.5285e-03, PNorm = 150.9275, GNorm = 0.1510, lr_0 = 2.6752e-04
Loss = 3.0702e-03, PNorm = 150.9341, GNorm = 0.0737, lr_0 = 2.6733e-04
Loss = 2.8635e-03, PNorm = 150.9379, GNorm = 0.0556, lr_0 = 2.6715e-04
Loss = 2.8056e-03, PNorm = 150.9429, GNorm = 0.1370, lr_0 = 2.6697e-04
Loss = 3.5523e-03, PNorm = 150.9463, GNorm = 0.1095, lr_0 = 2.6678e-04
Loss = 2.8421e-03, PNorm = 150.9510, GNorm = 0.0683, lr_0 = 2.6660e-04
Loss = 3.2608e-03, PNorm = 150.9539, GNorm = 0.3806, lr_0 = 2.6642e-04
Loss = 2.8387e-03, PNorm = 150.9614, GNorm = 0.1356, lr_0 = 2.6624e-04
Loss = 2.6436e-03, PNorm = 150.9671, GNorm = 0.1419, lr_0 = 2.6605e-04
Loss = 3.5865e-03, PNorm = 150.9736, GNorm = 0.0943, lr_0 = 2.6587e-04
Loss = 5.2739e-03, PNorm = 150.9835, GNorm = 0.1353, lr_0 = 2.6569e-04
Loss = 3.0465e-03, PNorm = 150.9911, GNorm = 0.0984, lr_0 = 2.6551e-04
Loss = 2.8173e-03, PNorm = 150.9954, GNorm = 0.0774, lr_0 = 2.6533e-04
Loss = 2.9289e-03, PNorm = 151.0006, GNorm = 0.2710, lr_0 = 2.6514e-04
Loss = 3.1190e-03, PNorm = 151.0046, GNorm = 0.1022, lr_0 = 2.6496e-04
Loss = 3.0056e-03, PNorm = 151.0095, GNorm = 0.1932, lr_0 = 2.6478e-04
Loss = 3.0188e-03, PNorm = 151.0158, GNorm = 0.3205, lr_0 = 2.6460e-04
Loss = 3.1418e-03, PNorm = 151.0211, GNorm = 0.1243, lr_0 = 2.6442e-04
Loss = 3.7349e-03, PNorm = 151.0300, GNorm = 0.1232, lr_0 = 2.6424e-04
Loss = 4.1544e-03, PNorm = 151.0383, GNorm = 0.1153, lr_0 = 2.6406e-04
Loss = 3.5734e-03, PNorm = 151.0483, GNorm = 0.0868, lr_0 = 2.6388e-04
Loss = 2.9151e-03, PNorm = 151.0553, GNorm = 0.0885, lr_0 = 2.6369e-04
Loss = 4.2190e-03, PNorm = 151.0597, GNorm = 0.1461, lr_0 = 2.6351e-04
Loss = 3.3375e-03, PNorm = 151.0670, GNorm = 0.0624, lr_0 = 2.6333e-04
Loss = 2.6308e-03, PNorm = 151.0745, GNorm = 0.2423, lr_0 = 2.6315e-04
Loss = 3.7514e-03, PNorm = 151.0801, GNorm = 0.2646, lr_0 = 2.6297e-04
Loss = 2.6485e-03, PNorm = 151.0857, GNorm = 0.2105, lr_0 = 2.6279e-04
Loss = 4.0417e-03, PNorm = 151.0928, GNorm = 0.2054, lr_0 = 2.6261e-04
Loss = 4.2432e-03, PNorm = 151.0974, GNorm = 0.2348, lr_0 = 2.6243e-04
Loss = 4.4715e-03, PNorm = 151.1063, GNorm = 0.2706, lr_0 = 2.6225e-04
Loss = 2.6813e-03, PNorm = 151.1151, GNorm = 0.1418, lr_0 = 2.6207e-04
Loss = 2.8365e-03, PNorm = 151.1221, GNorm = 0.2207, lr_0 = 2.6189e-04
Loss = 3.4730e-03, PNorm = 151.1277, GNorm = 0.2222, lr_0 = 2.6171e-04
Loss = 3.3288e-03, PNorm = 151.1358, GNorm = 0.1260, lr_0 = 2.6153e-04
Loss = 2.6940e-03, PNorm = 151.1436, GNorm = 0.1142, lr_0 = 2.6136e-04
Loss = 3.3306e-03, PNorm = 151.1502, GNorm = 0.1143, lr_0 = 2.6118e-04
Loss = 4.3992e-03, PNorm = 151.1532, GNorm = 0.1858, lr_0 = 2.6100e-04
Loss = 3.4653e-03, PNorm = 151.1587, GNorm = 0.2323, lr_0 = 2.6082e-04
Loss = 2.7886e-03, PNorm = 151.1675, GNorm = 0.1448, lr_0 = 2.6064e-04
Loss = 2.8353e-03, PNorm = 151.1740, GNorm = 0.1114, lr_0 = 2.6046e-04
Loss = 3.9713e-03, PNorm = 151.1798, GNorm = 0.0870, lr_0 = 2.6028e-04
Loss = 3.6331e-03, PNorm = 151.1865, GNorm = 0.1873, lr_0 = 2.6011e-04
Loss = 3.8750e-03, PNorm = 151.1963, GNorm = 0.0974, lr_0 = 2.5993e-04
Loss = 2.9161e-03, PNorm = 151.2033, GNorm = 0.1786, lr_0 = 2.5975e-04
Loss = 5.5878e-03, PNorm = 151.2115, GNorm = 0.2808, lr_0 = 2.5957e-04
Loss = 4.1711e-03, PNorm = 151.2194, GNorm = 0.1291, lr_0 = 2.5939e-04
Loss = 2.7650e-03, PNorm = 151.2280, GNorm = 0.0963, lr_0 = 2.5922e-04
Loss = 2.7520e-03, PNorm = 151.2350, GNorm = 0.1118, lr_0 = 2.5904e-04
Loss = 2.6417e-03, PNorm = 151.2400, GNorm = 0.1400, lr_0 = 2.5886e-04
Loss = 2.8251e-03, PNorm = 151.2464, GNorm = 0.1192, lr_0 = 2.5868e-04
Loss = 2.4860e-03, PNorm = 151.2530, GNorm = 0.0534, lr_0 = 2.5851e-04
Loss = 2.5766e-03, PNorm = 151.2572, GNorm = 0.0711, lr_0 = 2.5833e-04
Loss = 2.8672e-03, PNorm = 151.2627, GNorm = 0.0764, lr_0 = 2.5815e-04
Loss = 2.9926e-03, PNorm = 151.2690, GNorm = 0.2549, lr_0 = 2.5797e-04
Loss = 2.8709e-03, PNorm = 151.2763, GNorm = 0.2374, lr_0 = 2.5780e-04
Loss = 4.4752e-03, PNorm = 151.2849, GNorm = 0.1214, lr_0 = 2.5762e-04
Loss = 3.0557e-03, PNorm = 151.2903, GNorm = 0.1209, lr_0 = 2.5745e-04
Loss = 3.2630e-03, PNorm = 151.2940, GNorm = 0.2494, lr_0 = 2.5727e-04
Loss = 3.1231e-03, PNorm = 151.2987, GNorm = 0.1081, lr_0 = 2.5709e-04
Loss = 2.9708e-03, PNorm = 151.3040, GNorm = 0.2028, lr_0 = 2.5692e-04
Loss = 3.0902e-03, PNorm = 151.3101, GNorm = 0.1239, lr_0 = 2.5674e-04
Loss = 3.3798e-03, PNorm = 151.3179, GNorm = 0.2406, lr_0 = 2.5656e-04
Loss = 2.5232e-03, PNorm = 151.3223, GNorm = 0.1152, lr_0 = 2.5639e-04
Loss = 3.9128e-03, PNorm = 151.3270, GNorm = 0.1978, lr_0 = 2.5621e-04
Loss = 2.4451e-03, PNorm = 151.3322, GNorm = 0.3106, lr_0 = 2.5604e-04
Loss = 2.5219e-03, PNorm = 151.3414, GNorm = 0.1144, lr_0 = 2.5586e-04
Loss = 3.0875e-03, PNorm = 151.3502, GNorm = 0.2966, lr_0 = 2.5569e-04
Loss = 5.2083e-03, PNorm = 151.3568, GNorm = 0.4726, lr_0 = 2.5551e-04
Loss = 4.3550e-03, PNorm = 151.3653, GNorm = 0.2129, lr_0 = 2.5534e-04
Loss = 2.6514e-03, PNorm = 151.3726, GNorm = 0.1348, lr_0 = 2.5516e-04
Loss = 6.2438e-03, PNorm = 151.3780, GNorm = 0.3328, lr_0 = 2.5499e-04
Loss = 3.1307e-03, PNorm = 151.3826, GNorm = 0.3087, lr_0 = 2.5481e-04
Loss = 3.0461e-03, PNorm = 151.3890, GNorm = 0.0566, lr_0 = 2.5464e-04
Loss = 2.9572e-03, PNorm = 151.3951, GNorm = 0.1106, lr_0 = 2.5446e-04
Loss = 2.2787e-03, PNorm = 151.4001, GNorm = 0.1149, lr_0 = 2.5429e-04
Loss = 3.9643e-03, PNorm = 151.4090, GNorm = 0.1435, lr_0 = 2.5411e-04
Loss = 2.8880e-03, PNorm = 151.4158, GNorm = 0.2894, lr_0 = 2.5394e-04
Loss = 3.3573e-03, PNorm = 151.4212, GNorm = 0.1153, lr_0 = 2.5377e-04
Loss = 7.3563e-03, PNorm = 151.4268, GNorm = 0.4280, lr_0 = 2.5359e-04
Loss = 4.6833e-03, PNorm = 151.4346, GNorm = 0.5244, lr_0 = 2.5342e-04
Loss = 2.7023e-03, PNorm = 151.4423, GNorm = 0.1408, lr_0 = 2.5325e-04
Loss = 4.8552e-03, PNorm = 151.4519, GNorm = 0.3396, lr_0 = 2.5307e-04
Loss = 3.3436e-03, PNorm = 151.4615, GNorm = 0.2313, lr_0 = 2.5290e-04
Loss = 2.8673e-03, PNorm = 151.4713, GNorm = 0.1658, lr_0 = 2.5273e-04
Loss = 2.4036e-03, PNorm = 151.4798, GNorm = 0.2615, lr_0 = 2.5255e-04
Loss = 4.9848e-03, PNorm = 151.4858, GNorm = 0.1616, lr_0 = 2.5238e-04
Loss = 4.0866e-03, PNorm = 151.4903, GNorm = 0.1180, lr_0 = 2.5221e-04
Loss = 3.4370e-03, PNorm = 151.4959, GNorm = 0.3229, lr_0 = 2.5203e-04
Loss = 2.8574e-03, PNorm = 151.5017, GNorm = 0.1842, lr_0 = 2.5186e-04
Loss = 3.7858e-03, PNorm = 151.5110, GNorm = 0.1043, lr_0 = 2.5169e-04
Loss = 3.8508e-03, PNorm = 151.5165, GNorm = 0.2646, lr_0 = 2.5152e-04
Loss = 2.8711e-03, PNorm = 151.5240, GNorm = 0.1204, lr_0 = 2.5134e-04
Loss = 5.3448e-03, PNorm = 151.5311, GNorm = 0.2774, lr_0 = 2.5117e-04
Loss = 3.2601e-03, PNorm = 151.5378, GNorm = 0.2848, lr_0 = 2.5100e-04
Loss = 4.5381e-03, PNorm = 151.5429, GNorm = 0.1599, lr_0 = 2.5083e-04
Loss = 3.6439e-03, PNorm = 151.5473, GNorm = 0.1200, lr_0 = 2.5066e-04
Loss = 4.0975e-03, PNorm = 151.5567, GNorm = 0.0827, lr_0 = 2.5048e-04
Loss = 3.9637e-03, PNorm = 151.5640, GNorm = 0.2071, lr_0 = 2.5031e-04
Loss = 3.9319e-03, PNorm = 151.5720, GNorm = 0.1587, lr_0 = 2.5014e-04
Loss = 3.0977e-03, PNorm = 151.5791, GNorm = 0.2767, lr_0 = 2.4997e-04
Loss = 3.4794e-03, PNorm = 151.5878, GNorm = 0.0945, lr_0 = 2.4980e-04
Loss = 3.2657e-03, PNorm = 151.5934, GNorm = 0.2071, lr_0 = 2.4963e-04
Loss = 2.7737e-03, PNorm = 151.5999, GNorm = 0.0534, lr_0 = 2.4946e-04
Loss = 3.4662e-03, PNorm = 151.6063, GNorm = 0.1192, lr_0 = 2.4929e-04
Loss = 3.0331e-03, PNorm = 151.6149, GNorm = 0.1114, lr_0 = 2.4911e-04
Loss = 4.9607e-03, PNorm = 151.6213, GNorm = 0.1065, lr_0 = 2.4894e-04
Loss = 3.5325e-03, PNorm = 151.6307, GNorm = 0.2382, lr_0 = 2.4877e-04
Loss = 4.3835e-03, PNorm = 151.6383, GNorm = 0.1907, lr_0 = 2.4860e-04
Loss = 2.6573e-03, PNorm = 151.6471, GNorm = 0.2352, lr_0 = 2.4843e-04
Loss = 3.8699e-03, PNorm = 151.6524, GNorm = 0.2263, lr_0 = 2.4826e-04
Loss = 4.6802e-03, PNorm = 151.6579, GNorm = 0.0734, lr_0 = 2.4809e-04
Loss = 3.9982e-03, PNorm = 151.6659, GNorm = 0.0582, lr_0 = 2.4792e-04
Loss = 2.7694e-03, PNorm = 151.6764, GNorm = 0.1006, lr_0 = 2.4775e-04
Loss = 6.2188e-03, PNorm = 151.6842, GNorm = 0.1680, lr_0 = 2.4758e-04
Loss = 4.6566e-03, PNorm = 151.6915, GNorm = 0.1264, lr_0 = 2.4741e-04
Loss = 3.1770e-03, PNorm = 151.7021, GNorm = 0.5938, lr_0 = 2.4724e-04
Loss = 2.6938e-03, PNorm = 151.7073, GNorm = 0.2201, lr_0 = 2.4707e-04
Validation mae = 0.477159
Epoch 19
Loss = 2.3417e-03, PNorm = 151.7099, GNorm = 0.1245, lr_0 = 2.4690e-04
Loss = 2.3679e-03, PNorm = 151.7121, GNorm = 0.1594, lr_0 = 2.4674e-04
Loss = 2.5910e-03, PNorm = 151.7169, GNorm = 0.1854, lr_0 = 2.4657e-04
Loss = 3.0240e-03, PNorm = 151.7227, GNorm = 0.2351, lr_0 = 2.4640e-04
Loss = 2.5784e-03, PNorm = 151.7301, GNorm = 0.1132, lr_0 = 2.4623e-04
Loss = 2.4769e-03, PNorm = 151.7348, GNorm = 0.1185, lr_0 = 2.4606e-04
Loss = 2.1064e-03, PNorm = 151.7377, GNorm = 0.2367, lr_0 = 2.4589e-04
Loss = 3.3757e-03, PNorm = 151.7426, GNorm = 0.1227, lr_0 = 2.4572e-04
Loss = 2.5701e-03, PNorm = 151.7470, GNorm = 0.3091, lr_0 = 2.4556e-04
Loss = 3.4498e-03, PNorm = 151.7535, GNorm = 0.0722, lr_0 = 2.4539e-04
Loss = 2.6748e-03, PNorm = 151.7640, GNorm = 0.0597, lr_0 = 2.4522e-04
Loss = 3.8194e-03, PNorm = 151.7713, GNorm = 0.1163, lr_0 = 2.4505e-04
Loss = 3.4149e-03, PNorm = 151.7765, GNorm = 0.2756, lr_0 = 2.4488e-04
Loss = 3.5914e-03, PNorm = 151.7833, GNorm = 0.2515, lr_0 = 2.4472e-04
Loss = 2.5283e-03, PNorm = 151.7875, GNorm = 0.0525, lr_0 = 2.4455e-04
Loss = 2.4441e-03, PNorm = 151.7915, GNorm = 0.2625, lr_0 = 2.4438e-04
Loss = 2.8170e-03, PNorm = 151.7957, GNorm = 0.3892, lr_0 = 2.4421e-04
Loss = 2.4687e-03, PNorm = 151.8004, GNorm = 0.1325, lr_0 = 2.4405e-04
Loss = 2.7289e-03, PNorm = 151.8070, GNorm = 0.0940, lr_0 = 2.4388e-04
Loss = 2.4116e-03, PNorm = 151.8138, GNorm = 0.2348, lr_0 = 2.4371e-04
Loss = 2.7642e-03, PNorm = 151.8174, GNorm = 0.2268, lr_0 = 2.4354e-04
Loss = 2.3400e-03, PNorm = 151.8197, GNorm = 0.0968, lr_0 = 2.4338e-04
Loss = 2.8841e-03, PNorm = 151.8225, GNorm = 0.1158, lr_0 = 2.4321e-04
Loss = 2.6501e-03, PNorm = 151.8259, GNorm = 0.0652, lr_0 = 2.4304e-04
Loss = 2.5675e-03, PNorm = 151.8296, GNorm = 0.1426, lr_0 = 2.4288e-04
Loss = 2.9995e-03, PNorm = 151.8366, GNorm = 0.1051, lr_0 = 2.4271e-04
Loss = 2.7403e-03, PNorm = 151.8445, GNorm = 0.2837, lr_0 = 2.4254e-04
Loss = 2.8304e-03, PNorm = 151.8508, GNorm = 0.1911, lr_0 = 2.4238e-04
Loss = 2.6798e-03, PNorm = 151.8581, GNorm = 0.1775, lr_0 = 2.4221e-04
Loss = 2.9368e-03, PNorm = 151.8654, GNorm = 0.0662, lr_0 = 2.4205e-04
Loss = 2.2383e-03, PNorm = 151.8689, GNorm = 0.1998, lr_0 = 2.4188e-04
Loss = 2.8210e-03, PNorm = 151.8736, GNorm = 0.1443, lr_0 = 2.4171e-04
Loss = 2.5486e-03, PNorm = 151.8799, GNorm = 0.2227, lr_0 = 2.4155e-04
Loss = 2.5449e-03, PNorm = 151.8892, GNorm = 0.2942, lr_0 = 2.4138e-04
Loss = 3.7618e-03, PNorm = 151.8933, GNorm = 0.0964, lr_0 = 2.4122e-04
Loss = 2.8789e-03, PNorm = 151.8987, GNorm = 0.2488, lr_0 = 2.4105e-04
Loss = 3.3212e-03, PNorm = 151.9043, GNorm = 0.4640, lr_0 = 2.4089e-04
Loss = 2.7412e-03, PNorm = 151.9118, GNorm = 0.1186, lr_0 = 2.4072e-04
Loss = 2.9034e-03, PNorm = 151.9165, GNorm = 0.1578, lr_0 = 2.4056e-04
Loss = 6.2266e-03, PNorm = 151.9213, GNorm = 0.1830, lr_0 = 2.4039e-04
Loss = 5.3895e-03, PNorm = 151.9261, GNorm = 0.2832, lr_0 = 2.4023e-04
Loss = 2.7955e-03, PNorm = 151.9324, GNorm = 0.1538, lr_0 = 2.4006e-04
Loss = 2.8126e-03, PNorm = 151.9395, GNorm = 0.1537, lr_0 = 2.3990e-04
Loss = 2.6913e-03, PNorm = 151.9426, GNorm = 0.2285, lr_0 = 2.3974e-04
Loss = 2.0598e-03, PNorm = 151.9464, GNorm = 0.0364, lr_0 = 2.3957e-04
Loss = 2.7119e-03, PNorm = 151.9516, GNorm = 0.2552, lr_0 = 2.3941e-04
Loss = 2.2591e-03, PNorm = 151.9569, GNorm = 0.1305, lr_0 = 2.3924e-04
Loss = 2.7934e-03, PNorm = 151.9642, GNorm = 0.1799, lr_0 = 2.3908e-04
Loss = 2.6882e-03, PNorm = 151.9728, GNorm = 0.1950, lr_0 = 2.3892e-04
Loss = 2.0059e-03, PNorm = 151.9794, GNorm = 0.0677, lr_0 = 2.3875e-04
Loss = 2.6952e-03, PNorm = 151.9836, GNorm = 0.1223, lr_0 = 2.3859e-04
Loss = 3.0543e-03, PNorm = 151.9862, GNorm = 0.0895, lr_0 = 2.3842e-04
Loss = 2.1772e-03, PNorm = 151.9893, GNorm = 0.0648, lr_0 = 2.3826e-04
Loss = 2.0881e-03, PNorm = 151.9930, GNorm = 0.0507, lr_0 = 2.3810e-04
Loss = 3.4552e-03, PNorm = 151.9990, GNorm = 0.1638, lr_0 = 2.3794e-04
Loss = 4.2580e-03, PNorm = 152.0036, GNorm = 0.0844, lr_0 = 2.3777e-04
Loss = 2.2004e-03, PNorm = 152.0104, GNorm = 0.2311, lr_0 = 2.3761e-04
Loss = 2.9667e-03, PNorm = 152.0152, GNorm = 0.1114, lr_0 = 2.3745e-04
Loss = 2.4317e-03, PNorm = 152.0213, GNorm = 0.1478, lr_0 = 2.3728e-04
Loss = 3.6560e-03, PNorm = 152.0297, GNorm = 0.1123, lr_0 = 2.3712e-04
Loss = 2.3134e-03, PNorm = 152.0350, GNorm = 0.2598, lr_0 = 2.3696e-04
Loss = 2.2693e-03, PNorm = 152.0417, GNorm = 0.0995, lr_0 = 2.3680e-04
Loss = 3.4194e-03, PNorm = 152.0472, GNorm = 0.2121, lr_0 = 2.3663e-04
Loss = 3.4727e-03, PNorm = 152.0512, GNorm = 0.0963, lr_0 = 2.3647e-04
Loss = 4.2080e-03, PNorm = 152.0559, GNorm = 0.3400, lr_0 = 2.3631e-04
Loss = 3.4037e-03, PNorm = 152.0633, GNorm = 0.1894, lr_0 = 2.3615e-04
Loss = 2.6430e-03, PNorm = 152.0663, GNorm = 0.1462, lr_0 = 2.3599e-04
Loss = 3.2106e-03, PNorm = 152.0740, GNorm = 0.0578, lr_0 = 2.3582e-04
Loss = 4.2951e-03, PNorm = 152.0813, GNorm = 0.2753, lr_0 = 2.3566e-04
Loss = 5.8409e-03, PNorm = 152.0856, GNorm = 0.1060, lr_0 = 2.3550e-04
Loss = 2.1639e-03, PNorm = 152.0946, GNorm = 0.1026, lr_0 = 2.3534e-04
Loss = 2.6341e-03, PNorm = 152.1021, GNorm = 0.1689, lr_0 = 2.3518e-04
Loss = 3.5025e-03, PNorm = 152.1094, GNorm = 0.2337, lr_0 = 2.3502e-04
Loss = 3.1741e-03, PNorm = 152.1148, GNorm = 0.1540, lr_0 = 2.3486e-04
Loss = 3.3752e-03, PNorm = 152.1192, GNorm = 0.3013, lr_0 = 2.3470e-04
Loss = 4.0601e-03, PNorm = 152.1275, GNorm = 0.2146, lr_0 = 2.3454e-04
Loss = 2.3638e-03, PNorm = 152.1334, GNorm = 0.0575, lr_0 = 2.3437e-04
Loss = 3.3225e-03, PNorm = 152.1391, GNorm = 0.0710, lr_0 = 2.3421e-04
Loss = 4.1372e-03, PNorm = 152.1431, GNorm = 0.1649, lr_0 = 2.3405e-04
Loss = 7.0568e-03, PNorm = 152.1484, GNorm = 0.0507, lr_0 = 2.3389e-04
Loss = 2.5397e-03, PNorm = 152.1548, GNorm = 0.2339, lr_0 = 2.3373e-04
Loss = 3.2675e-03, PNorm = 152.1594, GNorm = 0.2036, lr_0 = 2.3357e-04
Loss = 2.9136e-03, PNorm = 152.1624, GNorm = 0.0620, lr_0 = 2.3341e-04
Loss = 3.7238e-03, PNorm = 152.1692, GNorm = 0.2250, lr_0 = 2.3325e-04
Loss = 4.8943e-03, PNorm = 152.1761, GNorm = 0.2016, lr_0 = 2.3309e-04
Loss = 4.2896e-03, PNorm = 152.1859, GNorm = 0.2497, lr_0 = 2.3293e-04
Loss = 3.4788e-03, PNorm = 152.1939, GNorm = 0.1382, lr_0 = 2.3277e-04
Loss = 2.5692e-03, PNorm = 152.2009, GNorm = 0.0856, lr_0 = 2.3261e-04
Loss = 2.7573e-03, PNorm = 152.2087, GNorm = 0.1918, lr_0 = 2.3246e-04
Loss = 3.5987e-03, PNorm = 152.2160, GNorm = 0.2161, lr_0 = 2.3230e-04
Loss = 3.6023e-03, PNorm = 152.2198, GNorm = 0.1979, lr_0 = 2.3214e-04
Loss = 3.6993e-03, PNorm = 152.2248, GNorm = 0.3968, lr_0 = 2.3198e-04
Loss = 2.2803e-03, PNorm = 152.2281, GNorm = 0.1160, lr_0 = 2.3182e-04
Loss = 3.4071e-03, PNorm = 152.2340, GNorm = 0.1291, lr_0 = 2.3166e-04
Loss = 3.8477e-03, PNorm = 152.2431, GNorm = 0.0566, lr_0 = 2.3150e-04
Loss = 2.5836e-03, PNorm = 152.2494, GNorm = 0.1954, lr_0 = 2.3134e-04
Loss = 3.7588e-03, PNorm = 152.2563, GNorm = 0.1294, lr_0 = 2.3118e-04
Loss = 3.6257e-03, PNorm = 152.2644, GNorm = 0.2222, lr_0 = 2.3103e-04
Loss = 3.3560e-03, PNorm = 152.2710, GNorm = 0.1712, lr_0 = 2.3087e-04
Loss = 2.9528e-03, PNorm = 152.2757, GNorm = 0.1104, lr_0 = 2.3071e-04
Loss = 2.5920e-03, PNorm = 152.2813, GNorm = 0.2385, lr_0 = 2.3055e-04
Loss = 2.2171e-03, PNorm = 152.2852, GNorm = 0.0765, lr_0 = 2.3039e-04
Loss = 4.3676e-03, PNorm = 152.2904, GNorm = 0.1537, lr_0 = 2.3024e-04
Loss = 2.1717e-03, PNorm = 152.2965, GNorm = 0.1153, lr_0 = 2.3008e-04
Loss = 2.8573e-03, PNorm = 152.3016, GNorm = 0.1221, lr_0 = 2.2992e-04
Loss = 2.9431e-03, PNorm = 152.3066, GNorm = 0.0720, lr_0 = 2.2976e-04
Loss = 2.1593e-03, PNorm = 152.3113, GNorm = 0.1820, lr_0 = 2.2961e-04
Loss = 2.3076e-03, PNorm = 152.3182, GNorm = 0.0645, lr_0 = 2.2945e-04
Loss = 3.6332e-03, PNorm = 152.3262, GNorm = 0.1427, lr_0 = 2.2929e-04
Loss = 2.2556e-03, PNorm = 152.3318, GNorm = 0.1262, lr_0 = 2.2913e-04
Loss = 3.2565e-03, PNorm = 152.3378, GNorm = 0.1072, lr_0 = 2.2898e-04
Loss = 2.3034e-03, PNorm = 152.3425, GNorm = 0.1914, lr_0 = 2.2882e-04
Loss = 2.1423e-03, PNorm = 152.3471, GNorm = 0.1142, lr_0 = 2.2866e-04
Loss = 2.8286e-03, PNorm = 152.3532, GNorm = 0.1368, lr_0 = 2.2851e-04
Loss = 1.9359e-03, PNorm = 152.3600, GNorm = 0.0590, lr_0 = 2.2835e-04
Loss = 3.8182e-03, PNorm = 152.3659, GNorm = 0.0716, lr_0 = 2.2819e-04
Loss = 2.7730e-03, PNorm = 152.3714, GNorm = 0.1874, lr_0 = 2.2804e-04
Loss = 3.7351e-03, PNorm = 152.3790, GNorm = 0.5800, lr_0 = 2.2788e-04
Loss = 2.3308e-03, PNorm = 152.3845, GNorm = 0.1786, lr_0 = 2.2773e-04
Loss = 2.7970e-03, PNorm = 152.3897, GNorm = 0.0852, lr_0 = 2.2757e-04
Validation mae = 0.477046
Epoch 20
Loss = 2.7641e-03, PNorm = 152.3934, GNorm = 0.0761, lr_0 = 2.2741e-04
Loss = 2.2918e-03, PNorm = 152.3972, GNorm = 0.2720, lr_0 = 2.2726e-04
Loss = 2.4294e-03, PNorm = 152.4000, GNorm = 0.1221, lr_0 = 2.2710e-04
Loss = 1.9888e-03, PNorm = 152.4035, GNorm = 0.0869, lr_0 = 2.2695e-04
Loss = 1.8232e-03, PNorm = 152.4047, GNorm = 0.1303, lr_0 = 2.2679e-04
Loss = 2.5063e-03, PNorm = 152.4081, GNorm = 0.1167, lr_0 = 2.2664e-04
Loss = 2.3088e-03, PNorm = 152.4129, GNorm = 0.2400, lr_0 = 2.2648e-04
Loss = 2.6420e-03, PNorm = 152.4151, GNorm = 0.1530, lr_0 = 2.2632e-04
Loss = 2.3142e-03, PNorm = 152.4211, GNorm = 0.1592, lr_0 = 2.2617e-04
Loss = 3.5787e-03, PNorm = 152.4243, GNorm = 0.1813, lr_0 = 2.2601e-04
Loss = 2.0803e-03, PNorm = 152.4277, GNorm = 0.2684, lr_0 = 2.2586e-04
Loss = 2.0215e-03, PNorm = 152.4318, GNorm = 0.2186, lr_0 = 2.2571e-04
Loss = 3.1367e-03, PNorm = 152.4368, GNorm = 0.4169, lr_0 = 2.2555e-04
Loss = 3.3434e-03, PNorm = 152.4396, GNorm = 0.2735, lr_0 = 2.2540e-04
Loss = 3.0718e-03, PNorm = 152.4465, GNorm = 0.3782, lr_0 = 2.2524e-04
Loss = 2.1993e-03, PNorm = 152.4510, GNorm = 0.2077, lr_0 = 2.2509e-04
Loss = 1.8971e-03, PNorm = 152.4568, GNorm = 0.1311, lr_0 = 2.2493e-04
Loss = 2.8627e-03, PNorm = 152.4613, GNorm = 0.0641, lr_0 = 2.2478e-04
Loss = 2.2104e-03, PNorm = 152.4662, GNorm = 0.0438, lr_0 = 2.2463e-04
Loss = 2.5782e-03, PNorm = 152.4708, GNorm = 0.0837, lr_0 = 2.2447e-04
Loss = 2.4459e-03, PNorm = 152.4727, GNorm = 0.1313, lr_0 = 2.2432e-04
Loss = 1.7279e-03, PNorm = 152.4769, GNorm = 0.0352, lr_0 = 2.2416e-04
Loss = 3.0690e-03, PNorm = 152.4811, GNorm = 0.0618, lr_0 = 2.2401e-04
Loss = 2.5751e-03, PNorm = 152.4846, GNorm = 0.2736, lr_0 = 2.2386e-04
Loss = 2.6646e-03, PNorm = 152.4880, GNorm = 0.1757, lr_0 = 2.2370e-04
Loss = 3.0245e-03, PNorm = 152.4956, GNorm = 0.2408, lr_0 = 2.2355e-04
Loss = 1.9559e-03, PNorm = 152.5054, GNorm = 0.0747, lr_0 = 2.2340e-04
Loss = 2.3044e-03, PNorm = 152.5104, GNorm = 0.1565, lr_0 = 2.2324e-04
Loss = 2.1135e-03, PNorm = 152.5115, GNorm = 0.1643, lr_0 = 2.2309e-04
Loss = 2.9713e-03, PNorm = 152.5144, GNorm = 0.0725, lr_0 = 2.2294e-04
Loss = 2.2122e-03, PNorm = 152.5158, GNorm = 0.0659, lr_0 = 2.2279e-04
Loss = 2.1857e-03, PNorm = 152.5171, GNorm = 0.1536, lr_0 = 2.2263e-04
Loss = 2.7590e-03, PNorm = 152.5228, GNorm = 0.1805, lr_0 = 2.2248e-04
Loss = 2.9836e-03, PNorm = 152.5252, GNorm = 0.2889, lr_0 = 2.2233e-04
Loss = 1.9873e-03, PNorm = 152.5328, GNorm = 0.1933, lr_0 = 2.2218e-04
Loss = 2.2688e-03, PNorm = 152.5374, GNorm = 0.0632, lr_0 = 2.2202e-04
Loss = 2.4812e-03, PNorm = 152.5426, GNorm = 0.1609, lr_0 = 2.2187e-04
Loss = 3.1543e-03, PNorm = 152.5473, GNorm = 0.1177, lr_0 = 2.2172e-04
Loss = 3.4239e-03, PNorm = 152.5512, GNorm = 0.0996, lr_0 = 2.2157e-04
Loss = 3.1475e-03, PNorm = 152.5523, GNorm = 0.1553, lr_0 = 2.2142e-04
Loss = 1.9641e-03, PNorm = 152.5535, GNorm = 0.1620, lr_0 = 2.2126e-04
Loss = 2.4275e-03, PNorm = 152.5573, GNorm = 0.0986, lr_0 = 2.2111e-04
Loss = 1.7094e-03, PNorm = 152.5603, GNorm = 0.1631, lr_0 = 2.2096e-04
Loss = 3.4622e-03, PNorm = 152.5679, GNorm = 0.0934, lr_0 = 2.2081e-04
Loss = 3.7783e-03, PNorm = 152.5742, GNorm = 0.2875, lr_0 = 2.2066e-04
Loss = 2.4557e-03, PNorm = 152.5787, GNorm = 0.1683, lr_0 = 2.2051e-04
Loss = 2.2303e-03, PNorm = 152.5817, GNorm = 0.0688, lr_0 = 2.2036e-04
Loss = 2.9147e-03, PNorm = 152.5868, GNorm = 0.0752, lr_0 = 2.2021e-04
Loss = 3.6099e-03, PNorm = 152.5933, GNorm = 0.3268, lr_0 = 2.2005e-04
Loss = 2.0627e-03, PNorm = 152.6001, GNorm = 0.0674, lr_0 = 2.1990e-04
Loss = 2.1285e-03, PNorm = 152.6057, GNorm = 0.0728, lr_0 = 2.1975e-04
Loss = 1.7515e-03, PNorm = 152.6119, GNorm = 0.1257, lr_0 = 2.1960e-04
Loss = 1.9744e-03, PNorm = 152.6170, GNorm = 0.1316, lr_0 = 2.1945e-04
Loss = 3.2322e-03, PNorm = 152.6214, GNorm = 0.2142, lr_0 = 2.1930e-04
Loss = 2.0833e-03, PNorm = 152.6237, GNorm = 0.1461, lr_0 = 2.1915e-04
Loss = 3.7583e-03, PNorm = 152.6254, GNorm = 0.1611, lr_0 = 2.1900e-04
Loss = 2.7821e-03, PNorm = 152.6281, GNorm = 0.3210, lr_0 = 2.1885e-04
Loss = 1.9731e-03, PNorm = 152.6323, GNorm = 0.1458, lr_0 = 2.1870e-04
Loss = 1.7906e-03, PNorm = 152.6354, GNorm = 0.1676, lr_0 = 2.1855e-04
Loss = 2.7097e-03, PNorm = 152.6410, GNorm = 0.0431, lr_0 = 2.1840e-04
Loss = 2.1419e-03, PNorm = 152.6480, GNorm = 0.0932, lr_0 = 2.1825e-04
Loss = 1.8435e-03, PNorm = 152.6505, GNorm = 0.0990, lr_0 = 2.1810e-04
Loss = 3.4818e-03, PNorm = 152.6537, GNorm = 0.1492, lr_0 = 2.1795e-04
Loss = 2.0246e-03, PNorm = 152.6598, GNorm = 0.1013, lr_0 = 2.1780e-04
Loss = 2.9816e-03, PNorm = 152.6627, GNorm = 0.1255, lr_0 = 2.1765e-04
Loss = 4.2785e-03, PNorm = 152.6671, GNorm = 0.1571, lr_0 = 2.1751e-04
Loss = 2.8482e-03, PNorm = 152.6704, GNorm = 0.1088, lr_0 = 2.1736e-04
Loss = 2.3678e-03, PNorm = 152.6755, GNorm = 0.0975, lr_0 = 2.1721e-04
Loss = 2.3343e-03, PNorm = 152.6826, GNorm = 0.1502, lr_0 = 2.1706e-04
Loss = 2.2892e-03, PNorm = 152.6895, GNorm = 0.5884, lr_0 = 2.1691e-04
Loss = 2.6947e-03, PNorm = 152.6930, GNorm = 0.0339, lr_0 = 2.1676e-04
Loss = 2.4956e-03, PNorm = 152.7004, GNorm = 0.0588, lr_0 = 2.1661e-04
Loss = 2.6944e-03, PNorm = 152.7066, GNorm = 0.0859, lr_0 = 2.1646e-04
Loss = 2.6361e-03, PNorm = 152.7101, GNorm = 0.0909, lr_0 = 2.1632e-04
Loss = 1.9054e-03, PNorm = 152.7135, GNorm = 0.2177, lr_0 = 2.1617e-04
Loss = 2.4448e-03, PNorm = 152.7148, GNorm = 0.1475, lr_0 = 2.1602e-04
Loss = 1.8080e-03, PNorm = 152.7207, GNorm = 0.0764, lr_0 = 2.1587e-04
Loss = 3.4241e-03, PNorm = 152.7256, GNorm = 0.1262, lr_0 = 2.1572e-04
Loss = 2.2851e-03, PNorm = 152.7321, GNorm = 0.1450, lr_0 = 2.1558e-04
Loss = 7.0977e-03, PNorm = 152.7378, GNorm = 0.1254, lr_0 = 2.1543e-04
Loss = 2.3335e-03, PNorm = 152.7473, GNorm = 0.2563, lr_0 = 2.1528e-04
Loss = 4.3174e-03, PNorm = 152.7530, GNorm = 0.1409, lr_0 = 2.1513e-04
Loss = 2.1938e-03, PNorm = 152.7574, GNorm = 0.1695, lr_0 = 2.1499e-04
Loss = 2.5812e-03, PNorm = 152.7633, GNorm = 0.0656, lr_0 = 2.1484e-04
Loss = 2.0402e-03, PNorm = 152.7670, GNorm = 0.0402, lr_0 = 2.1469e-04
Loss = 2.5979e-03, PNorm = 152.7699, GNorm = 0.2318, lr_0 = 2.1454e-04
Loss = 2.1006e-03, PNorm = 152.7741, GNorm = 0.1686, lr_0 = 2.1440e-04
Loss = 2.5067e-03, PNorm = 152.7791, GNorm = 0.0827, lr_0 = 2.1425e-04
Loss = 6.8467e-03, PNorm = 152.7847, GNorm = 0.1309, lr_0 = 2.1410e-04
Loss = 3.0162e-03, PNorm = 152.7889, GNorm = 0.1778, lr_0 = 2.1396e-04
Loss = 2.0475e-03, PNorm = 152.7954, GNorm = 0.1900, lr_0 = 2.1381e-04
Loss = 4.5855e-03, PNorm = 152.7988, GNorm = 0.2849, lr_0 = 2.1366e-04
Loss = 2.8140e-03, PNorm = 152.8032, GNorm = 0.1691, lr_0 = 2.1352e-04
Loss = 1.9342e-03, PNorm = 152.8094, GNorm = 0.0990, lr_0 = 2.1337e-04
Loss = 2.7863e-03, PNorm = 152.8149, GNorm = 0.1440, lr_0 = 2.1323e-04
Loss = 2.0742e-03, PNorm = 152.8189, GNorm = 0.0761, lr_0 = 2.1308e-04
Loss = 3.8708e-03, PNorm = 152.8232, GNorm = 0.0909, lr_0 = 2.1293e-04
Loss = 2.2276e-03, PNorm = 152.8305, GNorm = 0.1226, lr_0 = 2.1279e-04
Loss = 2.3823e-03, PNorm = 152.8362, GNorm = 0.2695, lr_0 = 2.1264e-04
Loss = 3.1909e-03, PNorm = 152.8409, GNorm = 0.0503, lr_0 = 2.1250e-04
Loss = 2.4086e-03, PNorm = 152.8467, GNorm = 0.1682, lr_0 = 2.1235e-04
Loss = 1.8261e-03, PNorm = 152.8527, GNorm = 0.1608, lr_0 = 2.1221e-04
Loss = 1.5095e-03, PNorm = 152.8573, GNorm = 0.0566, lr_0 = 2.1206e-04
Loss = 3.4124e-03, PNorm = 152.8635, GNorm = 0.0678, lr_0 = 2.1191e-04
Loss = 2.4928e-03, PNorm = 152.8674, GNorm = 0.1886, lr_0 = 2.1177e-04
Loss = 2.2430e-03, PNorm = 152.8727, GNorm = 0.0774, lr_0 = 2.1162e-04
Loss = 2.7042e-03, PNorm = 152.8752, GNorm = 0.0501, lr_0 = 2.1148e-04
Loss = 2.1991e-03, PNorm = 152.8804, GNorm = 0.1470, lr_0 = 2.1133e-04
Loss = 1.9706e-03, PNorm = 152.8854, GNorm = 0.1197, lr_0 = 2.1119e-04
Loss = 2.1424e-03, PNorm = 152.8900, GNorm = 0.0716, lr_0 = 2.1104e-04
Loss = 2.3595e-03, PNorm = 152.8968, GNorm = 0.0744, lr_0 = 2.1090e-04
Loss = 1.7487e-03, PNorm = 152.9010, GNorm = 0.1963, lr_0 = 2.1076e-04
Loss = 2.0035e-03, PNorm = 152.9054, GNorm = 0.1865, lr_0 = 2.1061e-04
Loss = 2.0914e-03, PNorm = 152.9102, GNorm = 0.0373, lr_0 = 2.1047e-04
Loss = 2.2661e-03, PNorm = 152.9140, GNorm = 0.0806, lr_0 = 2.1032e-04
Loss = 4.0808e-03, PNorm = 152.9152, GNorm = 0.1355, lr_0 = 2.1018e-04
Loss = 2.7989e-03, PNorm = 152.9174, GNorm = 0.1009, lr_0 = 2.1003e-04
Loss = 4.0360e-03, PNorm = 152.9215, GNorm = 0.0408, lr_0 = 2.0989e-04
Loss = 2.2987e-03, PNorm = 152.9291, GNorm = 0.3195, lr_0 = 2.0975e-04
Loss = 2.9126e-03, PNorm = 152.9350, GNorm = 0.1503, lr_0 = 2.0960e-04
Validation mae = 0.476474
Epoch 21
Loss = 1.6864e-03, PNorm = 152.9398, GNorm = 0.0618, lr_0 = 2.0946e-04
Loss = 1.8745e-03, PNorm = 152.9420, GNorm = 0.0715, lr_0 = 2.0932e-04
Loss = 1.6037e-03, PNorm = 152.9463, GNorm = 0.3381, lr_0 = 2.0917e-04
Loss = 1.6524e-03, PNorm = 152.9494, GNorm = 0.0649, lr_0 = 2.0903e-04
Loss = 2.6150e-03, PNorm = 152.9522, GNorm = 0.0886, lr_0 = 2.0889e-04
Loss = 2.2085e-03, PNorm = 152.9556, GNorm = 0.2912, lr_0 = 2.0874e-04
Loss = 1.9112e-03, PNorm = 152.9594, GNorm = 0.1725, lr_0 = 2.0860e-04
Loss = 2.6651e-03, PNorm = 152.9640, GNorm = 0.2809, lr_0 = 2.0846e-04
Loss = 2.6990e-03, PNorm = 152.9664, GNorm = 0.1628, lr_0 = 2.0831e-04
Loss = 2.0903e-03, PNorm = 152.9692, GNorm = 0.1439, lr_0 = 2.0817e-04
Loss = 1.4692e-03, PNorm = 152.9747, GNorm = 0.2216, lr_0 = 2.0803e-04
Loss = 2.4623e-03, PNorm = 152.9808, GNorm = 0.0981, lr_0 = 2.0789e-04
Loss = 1.9036e-03, PNorm = 152.9831, GNorm = 0.2340, lr_0 = 2.0774e-04
Loss = 2.3860e-03, PNorm = 152.9853, GNorm = 0.1460, lr_0 = 2.0760e-04
Loss = 2.6514e-03, PNorm = 152.9898, GNorm = 0.0789, lr_0 = 2.0746e-04
Loss = 3.2896e-03, PNorm = 152.9952, GNorm = 0.2469, lr_0 = 2.0732e-04
Loss = 2.7485e-03, PNorm = 152.9958, GNorm = 0.0745, lr_0 = 2.0718e-04
Loss = 4.1495e-03, PNorm = 153.0002, GNorm = 0.0971, lr_0 = 2.0703e-04
Loss = 1.6708e-03, PNorm = 153.0065, GNorm = 0.1733, lr_0 = 2.0689e-04
Loss = 2.3490e-03, PNorm = 153.0132, GNorm = 0.1193, lr_0 = 2.0675e-04
Loss = 1.3435e-03, PNorm = 153.0176, GNorm = 0.1024, lr_0 = 2.0661e-04
Loss = 1.9724e-03, PNorm = 153.0207, GNorm = 0.3608, lr_0 = 2.0647e-04
Loss = 2.0431e-03, PNorm = 153.0224, GNorm = 0.0576, lr_0 = 2.0633e-04
Loss = 2.0811e-03, PNorm = 153.0273, GNorm = 0.0765, lr_0 = 2.0618e-04
Loss = 1.4652e-03, PNorm = 153.0293, GNorm = 0.0625, lr_0 = 2.0604e-04
Loss = 2.2153e-03, PNorm = 153.0323, GNorm = 0.2346, lr_0 = 2.0590e-04
Loss = 3.0671e-03, PNorm = 153.0377, GNorm = 0.0607, lr_0 = 2.0576e-04
Loss = 1.9750e-03, PNorm = 153.0422, GNorm = 0.2517, lr_0 = 2.0562e-04
Loss = 2.5897e-03, PNorm = 153.0477, GNorm = 0.1559, lr_0 = 2.0548e-04
Loss = 2.8139e-03, PNorm = 153.0523, GNorm = 0.0594, lr_0 = 2.0534e-04
Loss = 3.7655e-03, PNorm = 153.0540, GNorm = 0.1250, lr_0 = 2.0520e-04
Loss = 3.0676e-03, PNorm = 153.0548, GNorm = 0.0985, lr_0 = 2.0506e-04
Loss = 2.5444e-03, PNorm = 153.0601, GNorm = 0.1159, lr_0 = 2.0492e-04
Loss = 1.7619e-03, PNorm = 153.0631, GNorm = 0.1085, lr_0 = 2.0478e-04
Loss = 1.6100e-03, PNorm = 153.0685, GNorm = 0.1504, lr_0 = 2.0464e-04
Loss = 1.7607e-03, PNorm = 153.0717, GNorm = 0.1068, lr_0 = 2.0450e-04
Loss = 1.9518e-03, PNorm = 153.0754, GNorm = 0.1057, lr_0 = 2.0436e-04
Loss = 1.6905e-03, PNorm = 153.0802, GNorm = 0.1819, lr_0 = 2.0422e-04
Loss = 3.3809e-03, PNorm = 153.0829, GNorm = 0.5933, lr_0 = 2.0408e-04
Loss = 1.4916e-03, PNorm = 153.0833, GNorm = 0.1040, lr_0 = 2.0394e-04
Loss = 1.9721e-03, PNorm = 153.0881, GNorm = 0.2436, lr_0 = 2.0380e-04
Loss = 2.3244e-03, PNorm = 153.0920, GNorm = 0.1701, lr_0 = 2.0366e-04
Loss = 1.6309e-03, PNorm = 153.0963, GNorm = 0.0853, lr_0 = 2.0352e-04
Loss = 1.8128e-03, PNorm = 153.1004, GNorm = 0.1718, lr_0 = 2.0338e-04
Loss = 1.9048e-03, PNorm = 153.1042, GNorm = 0.0995, lr_0 = 2.0324e-04
Loss = 2.0930e-03, PNorm = 153.1083, GNorm = 0.0464, lr_0 = 2.0310e-04
Loss = 2.3211e-03, PNorm = 153.1119, GNorm = 0.1639, lr_0 = 2.0296e-04
Loss = 2.8579e-03, PNorm = 153.1158, GNorm = 0.0891, lr_0 = 2.0282e-04
Loss = 1.4117e-03, PNorm = 153.1183, GNorm = 0.0649, lr_0 = 2.0268e-04
Loss = 1.9669e-03, PNorm = 153.1213, GNorm = 0.1537, lr_0 = 2.0254e-04
Loss = 2.0441e-03, PNorm = 153.1243, GNorm = 0.1857, lr_0 = 2.0240e-04
Loss = 2.0458e-03, PNorm = 153.1295, GNorm = 0.0482, lr_0 = 2.0227e-04
Loss = 1.8365e-03, PNorm = 153.1331, GNorm = 0.1043, lr_0 = 2.0213e-04
Loss = 1.7751e-03, PNorm = 153.1359, GNorm = 0.0579, lr_0 = 2.0199e-04
Loss = 3.1907e-03, PNorm = 153.1369, GNorm = 0.5030, lr_0 = 2.0185e-04
Loss = 2.1022e-03, PNorm = 153.1397, GNorm = 0.2419, lr_0 = 2.0171e-04
Loss = 2.8003e-03, PNorm = 153.1461, GNorm = 0.2058, lr_0 = 2.0157e-04
Loss = 2.9360e-03, PNorm = 153.1518, GNorm = 0.0978, lr_0 = 2.0144e-04
Loss = 2.2042e-03, PNorm = 153.1577, GNorm = 0.1829, lr_0 = 2.0130e-04
Loss = 1.5529e-03, PNorm = 153.1609, GNorm = 0.2722, lr_0 = 2.0116e-04
Loss = 2.7405e-03, PNorm = 153.1628, GNorm = 0.1366, lr_0 = 2.0102e-04
Loss = 1.9684e-03, PNorm = 153.1670, GNorm = 0.0605, lr_0 = 2.0088e-04
Loss = 1.7747e-03, PNorm = 153.1741, GNorm = 0.1378, lr_0 = 2.0075e-04
Loss = 2.1338e-03, PNorm = 153.1781, GNorm = 0.2234, lr_0 = 2.0061e-04
Loss = 1.7923e-03, PNorm = 153.1817, GNorm = 0.1078, lr_0 = 2.0047e-04
Loss = 2.0155e-03, PNorm = 153.1839, GNorm = 0.0901, lr_0 = 2.0033e-04
Loss = 4.7915e-03, PNorm = 153.1880, GNorm = 0.0605, lr_0 = 2.0020e-04
Loss = 5.9447e-03, PNorm = 153.1943, GNorm = 0.1562, lr_0 = 2.0006e-04
Loss = 1.7628e-03, PNorm = 153.1964, GNorm = 0.0706, lr_0 = 1.9992e-04
Loss = 1.7967e-03, PNorm = 153.2007, GNorm = 0.2151, lr_0 = 1.9979e-04
Loss = 1.5744e-03, PNorm = 153.2067, GNorm = 0.1267, lr_0 = 1.9965e-04
Loss = 4.2908e-03, PNorm = 153.2124, GNorm = 0.1148, lr_0 = 1.9951e-04
Loss = 2.6881e-03, PNorm = 153.2170, GNorm = 0.2645, lr_0 = 1.9938e-04
Loss = 1.5572e-03, PNorm = 153.2218, GNorm = 0.0984, lr_0 = 1.9924e-04
Loss = 2.0394e-03, PNorm = 153.2262, GNorm = 0.0679, lr_0 = 1.9910e-04
Loss = 3.0597e-03, PNorm = 153.2304, GNorm = 0.0580, lr_0 = 1.9897e-04
Loss = 1.6863e-03, PNorm = 153.2349, GNorm = 0.1561, lr_0 = 1.9883e-04
Loss = 1.4617e-03, PNorm = 153.2383, GNorm = 0.0888, lr_0 = 1.9869e-04
Loss = 1.7431e-03, PNorm = 153.2427, GNorm = 0.1331, lr_0 = 1.9856e-04
Loss = 3.4012e-03, PNorm = 153.2452, GNorm = 0.0681, lr_0 = 1.9842e-04
Loss = 1.6148e-03, PNorm = 153.2478, GNorm = 0.1405, lr_0 = 1.9829e-04
Loss = 2.0714e-03, PNorm = 153.2547, GNorm = 0.1886, lr_0 = 1.9815e-04
Loss = 2.3103e-03, PNorm = 153.2600, GNorm = 0.1542, lr_0 = 1.9801e-04
Loss = 3.1855e-03, PNorm = 153.2623, GNorm = 0.1988, lr_0 = 1.9788e-04
Loss = 2.3768e-03, PNorm = 153.2666, GNorm = 0.1115, lr_0 = 1.9774e-04
Loss = 2.8895e-03, PNorm = 153.2701, GNorm = 0.0847, lr_0 = 1.9761e-04
Loss = 1.8488e-03, PNorm = 153.2735, GNorm = 0.1796, lr_0 = 1.9747e-04
Loss = 3.5403e-03, PNorm = 153.2776, GNorm = 0.1003, lr_0 = 1.9734e-04
Loss = 1.5732e-03, PNorm = 153.2823, GNorm = 0.0507, lr_0 = 1.9720e-04
Loss = 4.1261e-03, PNorm = 153.2860, GNorm = 0.0967, lr_0 = 1.9707e-04
Loss = 2.3706e-03, PNorm = 153.2901, GNorm = 0.1603, lr_0 = 1.9693e-04
Loss = 3.4424e-03, PNorm = 153.2983, GNorm = 0.2523, lr_0 = 1.9680e-04
Loss = 1.9129e-03, PNorm = 153.3039, GNorm = 0.1218, lr_0 = 1.9666e-04
Loss = 1.8886e-03, PNorm = 153.3073, GNorm = 0.1137, lr_0 = 1.9653e-04
Loss = 3.6424e-03, PNorm = 153.3107, GNorm = 0.0949, lr_0 = 1.9639e-04
Loss = 3.0123e-03, PNorm = 153.3142, GNorm = 0.1471, lr_0 = 1.9626e-04
Loss = 2.5379e-03, PNorm = 153.3214, GNorm = 0.0780, lr_0 = 1.9612e-04
Loss = 2.7053e-03, PNorm = 153.3267, GNorm = 0.1989, lr_0 = 1.9599e-04
Loss = 1.8327e-03, PNorm = 153.3338, GNorm = 0.1620, lr_0 = 1.9585e-04
Loss = 2.6614e-03, PNorm = 153.3377, GNorm = 0.1562, lr_0 = 1.9572e-04
Loss = 2.0510e-03, PNorm = 153.3442, GNorm = 0.0459, lr_0 = 1.9559e-04
Loss = 1.9256e-03, PNorm = 153.3492, GNorm = 0.2652, lr_0 = 1.9545e-04
Loss = 1.8162e-03, PNorm = 153.3522, GNorm = 0.1025, lr_0 = 1.9532e-04
Loss = 2.8223e-03, PNorm = 153.3544, GNorm = 0.0527, lr_0 = 1.9518e-04
Loss = 1.9112e-03, PNorm = 153.3595, GNorm = 0.0915, lr_0 = 1.9505e-04
Loss = 1.6348e-03, PNorm = 153.3663, GNorm = 0.1241, lr_0 = 1.9492e-04
Loss = 4.0074e-03, PNorm = 153.3692, GNorm = 0.1425, lr_0 = 1.9478e-04
Loss = 2.9915e-03, PNorm = 153.3730, GNorm = 0.0473, lr_0 = 1.9465e-04
Loss = 1.6446e-03, PNorm = 153.3765, GNorm = 0.0771, lr_0 = 1.9452e-04
Loss = 2.7723e-03, PNorm = 153.3810, GNorm = 0.0322, lr_0 = 1.9438e-04
Loss = 6.2650e-03, PNorm = 153.3845, GNorm = 0.0892, lr_0 = 1.9425e-04
Loss = 2.9646e-03, PNorm = 153.3883, GNorm = 0.0982, lr_0 = 1.9412e-04
Loss = 2.7734e-03, PNorm = 153.3929, GNorm = 0.2611, lr_0 = 1.9398e-04
Loss = 1.8406e-03, PNorm = 153.3964, GNorm = 0.2952, lr_0 = 1.9385e-04
Loss = 1.7091e-03, PNorm = 153.4040, GNorm = 0.1578, lr_0 = 1.9372e-04
Loss = 2.3703e-03, PNorm = 153.4110, GNorm = 0.1654, lr_0 = 1.9359e-04
Loss = 1.6897e-03, PNorm = 153.4163, GNorm = 0.1260, lr_0 = 1.9345e-04
Loss = 2.1870e-03, PNorm = 153.4195, GNorm = 0.1248, lr_0 = 1.9332e-04
Loss = 2.9120e-03, PNorm = 153.4228, GNorm = 0.1474, lr_0 = 1.9319e-04
Loss = 2.6531e-03, PNorm = 153.4265, GNorm = 0.2966, lr_0 = 1.9306e-04
Validation mae = 0.478004
Epoch 22
Loss = 1.6649e-03, PNorm = 153.4299, GNorm = 0.1499, lr_0 = 1.9292e-04
Loss = 1.8251e-03, PNorm = 153.4322, GNorm = 0.0531, lr_0 = 1.9279e-04
Loss = 1.9473e-03, PNorm = 153.4364, GNorm = 0.0398, lr_0 = 1.9266e-04
Loss = 1.5797e-03, PNorm = 153.4405, GNorm = 0.0677, lr_0 = 1.9253e-04
Loss = 1.4822e-03, PNorm = 153.4442, GNorm = 0.1448, lr_0 = 1.9240e-04
Loss = 1.6547e-03, PNorm = 153.4473, GNorm = 0.0889, lr_0 = 1.9226e-04
Loss = 1.6892e-03, PNorm = 153.4495, GNorm = 0.0244, lr_0 = 1.9213e-04
Loss = 2.3262e-03, PNorm = 153.4516, GNorm = 0.1506, lr_0 = 1.9200e-04
Loss = 1.4689e-03, PNorm = 153.4549, GNorm = 0.0669, lr_0 = 1.9187e-04
Loss = 2.2034e-03, PNorm = 153.4568, GNorm = 0.0350, lr_0 = 1.9174e-04
Loss = 1.5244e-03, PNorm = 153.4592, GNorm = 0.1338, lr_0 = 1.9161e-04
Loss = 2.3718e-03, PNorm = 153.4609, GNorm = 0.0873, lr_0 = 1.9148e-04
Loss = 2.0057e-03, PNorm = 153.4644, GNorm = 0.0338, lr_0 = 1.9134e-04
Loss = 1.9788e-03, PNorm = 153.4668, GNorm = 0.1888, lr_0 = 1.9121e-04
Loss = 1.8881e-03, PNorm = 153.4735, GNorm = 0.2357, lr_0 = 1.9108e-04
Loss = 1.6620e-03, PNorm = 153.4789, GNorm = 0.0854, lr_0 = 1.9095e-04
Loss = 4.5457e-03, PNorm = 153.4812, GNorm = 0.1364, lr_0 = 1.9082e-04
Loss = 2.2893e-03, PNorm = 153.4837, GNorm = 0.0795, lr_0 = 1.9069e-04
Loss = 1.5510e-03, PNorm = 153.4851, GNorm = 0.0741, lr_0 = 1.9056e-04
Loss = 3.3347e-03, PNorm = 153.4862, GNorm = 0.0703, lr_0 = 1.9043e-04
Loss = 2.5791e-03, PNorm = 153.4894, GNorm = 0.2041, lr_0 = 1.9030e-04
Loss = 1.3854e-03, PNorm = 153.4929, GNorm = 0.0674, lr_0 = 1.9017e-04
Loss = 1.7944e-03, PNorm = 153.4973, GNorm = 0.1154, lr_0 = 1.9004e-04
Loss = 3.3511e-03, PNorm = 153.4990, GNorm = 0.1046, lr_0 = 1.8991e-04
Loss = 1.8387e-03, PNorm = 153.5014, GNorm = 0.0802, lr_0 = 1.8978e-04
Loss = 2.6546e-03, PNorm = 153.5047, GNorm = 0.2087, lr_0 = 1.8965e-04
Loss = 3.0981e-03, PNorm = 153.5077, GNorm = 0.2470, lr_0 = 1.8952e-04
Loss = 2.1545e-03, PNorm = 153.5123, GNorm = 0.2165, lr_0 = 1.8939e-04
Loss = 1.4891e-03, PNorm = 153.5155, GNorm = 0.1457, lr_0 = 1.8926e-04
Loss = 1.9322e-03, PNorm = 153.5178, GNorm = 0.0815, lr_0 = 1.8913e-04
Loss = 2.0319e-03, PNorm = 153.5204, GNorm = 0.0997, lr_0 = 1.8900e-04
Loss = 1.5674e-03, PNorm = 153.5240, GNorm = 0.0872, lr_0 = 1.8887e-04
Loss = 2.2710e-03, PNorm = 153.5295, GNorm = 0.1738, lr_0 = 1.8874e-04
Loss = 1.3883e-03, PNorm = 153.5334, GNorm = 0.0329, lr_0 = 1.8861e-04
Loss = 2.8596e-03, PNorm = 153.5374, GNorm = 0.1881, lr_0 = 1.8848e-04
Loss = 2.4984e-03, PNorm = 153.5406, GNorm = 0.1498, lr_0 = 1.8835e-04
Loss = 2.2381e-03, PNorm = 153.5424, GNorm = 0.1659, lr_0 = 1.8822e-04
Loss = 1.9447e-03, PNorm = 153.5434, GNorm = 0.2866, lr_0 = 1.8809e-04
Loss = 2.1763e-03, PNorm = 153.5454, GNorm = 0.0515, lr_0 = 1.8797e-04
Loss = 1.5979e-03, PNorm = 153.5470, GNorm = 0.0483, lr_0 = 1.8784e-04
Loss = 1.6935e-03, PNorm = 153.5500, GNorm = 0.0574, lr_0 = 1.8771e-04
Loss = 1.3088e-03, PNorm = 153.5525, GNorm = 0.1584, lr_0 = 1.8758e-04
Loss = 1.3999e-03, PNorm = 153.5574, GNorm = 0.1543, lr_0 = 1.8745e-04
Loss = 1.4532e-03, PNorm = 153.5616, GNorm = 0.2346, lr_0 = 1.8732e-04
Loss = 1.4834e-03, PNorm = 153.5646, GNorm = 0.2047, lr_0 = 1.8719e-04
Loss = 1.3426e-03, PNorm = 153.5657, GNorm = 0.0736, lr_0 = 1.8707e-04
Loss = 2.0939e-03, PNorm = 153.5693, GNorm = 0.1334, lr_0 = 1.8694e-04
Loss = 1.7403e-03, PNorm = 153.5756, GNorm = 0.1753, lr_0 = 1.8681e-04
Loss = 1.7875e-03, PNorm = 153.5798, GNorm = 0.0818, lr_0 = 1.8668e-04
Loss = 2.5481e-03, PNorm = 153.5827, GNorm = 0.1888, lr_0 = 1.8655e-04
Loss = 1.5044e-03, PNorm = 153.5855, GNorm = 0.0827, lr_0 = 1.8643e-04
Loss = 2.1900e-03, PNorm = 153.5882, GNorm = 0.5770, lr_0 = 1.8630e-04
Loss = 1.8534e-03, PNorm = 153.5906, GNorm = 0.1207, lr_0 = 1.8617e-04
Loss = 1.7199e-03, PNorm = 153.5929, GNorm = 0.0731, lr_0 = 1.8604e-04
Loss = 2.3329e-03, PNorm = 153.5959, GNorm = 0.1394, lr_0 = 1.8592e-04
Loss = 1.2216e-03, PNorm = 153.5985, GNorm = 0.0749, lr_0 = 1.8579e-04
Loss = 2.2397e-03, PNorm = 153.6005, GNorm = 0.1722, lr_0 = 1.8566e-04
Loss = 1.5344e-03, PNorm = 153.6031, GNorm = 0.4255, lr_0 = 1.8553e-04
Loss = 3.0722e-03, PNorm = 153.6052, GNorm = 0.2159, lr_0 = 1.8541e-04
Loss = 4.1280e-03, PNorm = 153.6079, GNorm = 0.1131, lr_0 = 1.8528e-04
Loss = 1.7660e-03, PNorm = 153.6100, GNorm = 0.1045, lr_0 = 1.8515e-04
Loss = 2.8478e-03, PNorm = 153.6145, GNorm = 0.0692, lr_0 = 1.8503e-04
Loss = 2.6603e-03, PNorm = 153.6185, GNorm = 0.2169, lr_0 = 1.8490e-04
Loss = 1.6161e-03, PNorm = 153.6203, GNorm = 0.0407, lr_0 = 1.8477e-04
Loss = 2.7619e-03, PNorm = 153.6250, GNorm = 0.1810, lr_0 = 1.8465e-04
Loss = 1.9218e-03, PNorm = 153.6291, GNorm = 0.3761, lr_0 = 1.8452e-04
Loss = 1.5405e-03, PNorm = 153.6315, GNorm = 0.0636, lr_0 = 1.8439e-04
Loss = 1.4813e-03, PNorm = 153.6326, GNorm = 0.0477, lr_0 = 1.8427e-04
Loss = 1.3377e-03, PNorm = 153.6370, GNorm = 0.1594, lr_0 = 1.8414e-04
Loss = 1.4537e-03, PNorm = 153.6404, GNorm = 0.0554, lr_0 = 1.8401e-04
Loss = 2.2492e-03, PNorm = 153.6452, GNorm = 0.0693, lr_0 = 1.8389e-04
Loss = 2.4301e-03, PNorm = 153.6503, GNorm = 0.1676, lr_0 = 1.8376e-04
Loss = 1.9111e-03, PNorm = 153.6568, GNorm = 0.2016, lr_0 = 1.8364e-04
Loss = 1.8488e-03, PNorm = 153.6591, GNorm = 0.1559, lr_0 = 1.8351e-04
Loss = 1.5575e-03, PNorm = 153.6608, GNorm = 0.0915, lr_0 = 1.8338e-04
Loss = 1.4623e-03, PNorm = 153.6636, GNorm = 0.0502, lr_0 = 1.8326e-04
Loss = 2.2196e-03, PNorm = 153.6666, GNorm = 0.0338, lr_0 = 1.8313e-04
Loss = 1.4025e-03, PNorm = 153.6691, GNorm = 0.0911, lr_0 = 1.8301e-04
Loss = 1.4051e-03, PNorm = 153.6716, GNorm = 0.1683, lr_0 = 1.8288e-04
Loss = 2.4482e-03, PNorm = 153.6752, GNorm = 0.2289, lr_0 = 1.8276e-04
Loss = 2.5304e-03, PNorm = 153.6768, GNorm = 0.1786, lr_0 = 1.8263e-04
Loss = 3.6451e-03, PNorm = 153.6800, GNorm = 0.0766, lr_0 = 1.8251e-04
Loss = 1.3400e-03, PNorm = 153.6837, GNorm = 0.2053, lr_0 = 1.8238e-04
Loss = 1.9809e-03, PNorm = 153.6880, GNorm = 0.1486, lr_0 = 1.8226e-04
Loss = 2.3878e-03, PNorm = 153.6936, GNorm = 0.0954, lr_0 = 1.8213e-04
Loss = 1.8779e-03, PNorm = 153.6991, GNorm = 0.0929, lr_0 = 1.8201e-04
Loss = 1.5637e-03, PNorm = 153.7061, GNorm = 0.0887, lr_0 = 1.8188e-04
Loss = 1.3924e-03, PNorm = 153.7085, GNorm = 0.1155, lr_0 = 1.8176e-04
Loss = 1.6359e-03, PNorm = 153.7108, GNorm = 0.1248, lr_0 = 1.8163e-04
Loss = 3.6746e-03, PNorm = 153.7120, GNorm = 0.1261, lr_0 = 1.8151e-04
Loss = 2.2145e-03, PNorm = 153.7160, GNorm = 0.0463, lr_0 = 1.8138e-04
Loss = 3.2551e-03, PNorm = 153.7206, GNorm = 0.3553, lr_0 = 1.8126e-04
Loss = 3.4792e-03, PNorm = 153.7250, GNorm = 0.1404, lr_0 = 1.8114e-04
Loss = 2.5138e-03, PNorm = 153.7289, GNorm = 0.1096, lr_0 = 1.8101e-04
Loss = 2.0055e-03, PNorm = 153.7315, GNorm = 0.0330, lr_0 = 1.8089e-04
Loss = 4.7965e-03, PNorm = 153.7358, GNorm = 0.0470, lr_0 = 1.8076e-04
Loss = 1.7819e-03, PNorm = 153.7396, GNorm = 0.1040, lr_0 = 1.8064e-04
Loss = 1.9443e-03, PNorm = 153.7427, GNorm = 0.1391, lr_0 = 1.8052e-04
Loss = 1.8032e-03, PNorm = 153.7472, GNorm = 0.1056, lr_0 = 1.8039e-04
Loss = 1.2788e-03, PNorm = 153.7526, GNorm = 0.1712, lr_0 = 1.8027e-04
Loss = 2.0538e-03, PNorm = 153.7578, GNorm = 0.4843, lr_0 = 1.8015e-04
Loss = 3.7054e-03, PNorm = 153.7600, GNorm = 0.5469, lr_0 = 1.8002e-04
Loss = 3.0077e-03, PNorm = 153.7625, GNorm = 0.0860, lr_0 = 1.7990e-04
Loss = 2.9628e-03, PNorm = 153.7660, GNorm = 0.1147, lr_0 = 1.7978e-04
Loss = 1.6254e-03, PNorm = 153.7724, GNorm = 0.2685, lr_0 = 1.7965e-04
Loss = 2.9046e-03, PNorm = 153.7771, GNorm = 0.1205, lr_0 = 1.7953e-04
Loss = 1.3603e-03, PNorm = 153.7812, GNorm = 0.0743, lr_0 = 1.7941e-04
Loss = 2.1598e-03, PNorm = 153.7838, GNorm = 0.0306, lr_0 = 1.7928e-04
Loss = 1.4712e-03, PNorm = 153.7851, GNorm = 0.1096, lr_0 = 1.7916e-04
Loss = 1.6514e-03, PNorm = 153.7888, GNorm = 0.1545, lr_0 = 1.7904e-04
Loss = 4.0600e-03, PNorm = 153.7921, GNorm = 0.0983, lr_0 = 1.7892e-04
Loss = 2.6170e-03, PNorm = 153.7975, GNorm = 0.0512, lr_0 = 1.7879e-04
Loss = 1.4843e-03, PNorm = 153.8003, GNorm = 0.1938, lr_0 = 1.7867e-04
Loss = 1.5595e-03, PNorm = 153.8020, GNorm = 0.0759, lr_0 = 1.7855e-04
Loss = 2.3977e-03, PNorm = 153.8046, GNorm = 0.1061, lr_0 = 1.7843e-04
Loss = 2.0672e-03, PNorm = 153.8109, GNorm = 0.1438, lr_0 = 1.7830e-04
Loss = 1.3409e-03, PNorm = 153.8152, GNorm = 0.1496, lr_0 = 1.7818e-04
Loss = 3.6453e-03, PNorm = 153.8199, GNorm = 0.1886, lr_0 = 1.7806e-04
Loss = 1.7799e-03, PNorm = 153.8247, GNorm = 0.0927, lr_0 = 1.7794e-04
Loss = 2.0636e-03, PNorm = 153.8267, GNorm = 0.0483, lr_0 = 1.7782e-04
Validation mae = 0.476063
Epoch 23
Loss = 1.4533e-03, PNorm = 153.8296, GNorm = 0.0840, lr_0 = 1.7769e-04
Loss = 1.3073e-03, PNorm = 153.8322, GNorm = 0.1338, lr_0 = 1.7757e-04
Loss = 3.1200e-03, PNorm = 153.8346, GNorm = 0.5386, lr_0 = 1.7745e-04
Loss = 1.2124e-03, PNorm = 153.8371, GNorm = 0.1332, lr_0 = 1.7733e-04
Loss = 1.3212e-03, PNorm = 153.8378, GNorm = 0.3004, lr_0 = 1.7721e-04
Loss = 1.2785e-03, PNorm = 153.8416, GNorm = 0.2637, lr_0 = 1.7709e-04
Loss = 1.2601e-03, PNorm = 153.8432, GNorm = 0.0591, lr_0 = 1.7696e-04
Loss = 2.0272e-03, PNorm = 153.8459, GNorm = 0.2531, lr_0 = 1.7684e-04
Loss = 9.9527e-04, PNorm = 153.8476, GNorm = 0.1363, lr_0 = 1.7672e-04
Loss = 2.5974e-03, PNorm = 153.8513, GNorm = 0.2277, lr_0 = 1.7660e-04
Loss = 1.5424e-03, PNorm = 153.8516, GNorm = 0.1798, lr_0 = 1.7648e-04
Loss = 1.7143e-03, PNorm = 153.8528, GNorm = 0.1792, lr_0 = 1.7636e-04
Loss = 1.7727e-03, PNorm = 153.8545, GNorm = 0.1256, lr_0 = 1.7624e-04
Loss = 1.1836e-03, PNorm = 153.8579, GNorm = 0.0524, lr_0 = 1.7612e-04
Loss = 1.4647e-03, PNorm = 153.8605, GNorm = 0.0309, lr_0 = 1.7600e-04
Loss = 1.7740e-03, PNorm = 153.8632, GNorm = 0.0378, lr_0 = 1.7588e-04
Loss = 1.4978e-03, PNorm = 153.8670, GNorm = 0.0585, lr_0 = 1.7576e-04
Loss = 1.1799e-03, PNorm = 153.8705, GNorm = 0.1312, lr_0 = 1.7564e-04
Loss = 1.3830e-03, PNorm = 153.8745, GNorm = 0.0922, lr_0 = 1.7552e-04
Loss = 1.2689e-03, PNorm = 153.8753, GNorm = 0.0876, lr_0 = 1.7540e-04
Loss = 2.0694e-03, PNorm = 153.8752, GNorm = 0.1098, lr_0 = 1.7528e-04
Loss = 1.4139e-03, PNorm = 153.8770, GNorm = 0.0296, lr_0 = 1.7516e-04
Loss = 1.0502e-03, PNorm = 153.8807, GNorm = 0.1799, lr_0 = 1.7504e-04
Loss = 1.5698e-03, PNorm = 153.8855, GNorm = 0.1285, lr_0 = 1.7492e-04
Loss = 1.4177e-03, PNorm = 153.8904, GNorm = 0.0872, lr_0 = 1.7480e-04
Loss = 1.5509e-03, PNorm = 153.8923, GNorm = 0.1483, lr_0 = 1.7468e-04
Loss = 1.0396e-03, PNorm = 153.8948, GNorm = 0.0857, lr_0 = 1.7456e-04
Loss = 1.1592e-03, PNorm = 153.8965, GNorm = 0.0928, lr_0 = 1.7444e-04
Loss = 1.8799e-03, PNorm = 153.8979, GNorm = 0.0634, lr_0 = 1.7432e-04
Loss = 1.4514e-03, PNorm = 153.9012, GNorm = 0.1674, lr_0 = 1.7420e-04
Loss = 1.9245e-03, PNorm = 153.9052, GNorm = 0.1051, lr_0 = 1.7408e-04
Loss = 1.8925e-03, PNorm = 153.9069, GNorm = 0.1746, lr_0 = 1.7396e-04
Loss = 2.5579e-03, PNorm = 153.9083, GNorm = 0.3245, lr_0 = 1.7384e-04
Loss = 2.0435e-03, PNorm = 153.9088, GNorm = 0.1098, lr_0 = 1.7372e-04
Loss = 1.2097e-03, PNorm = 153.9109, GNorm = 0.1309, lr_0 = 1.7360e-04
Loss = 1.7230e-03, PNorm = 153.9143, GNorm = 0.1096, lr_0 = 1.7348e-04
Loss = 1.5122e-03, PNorm = 153.9197, GNorm = 0.1574, lr_0 = 1.7336e-04
Loss = 3.1196e-03, PNorm = 153.9253, GNorm = 0.1097, lr_0 = 1.7325e-04
Loss = 1.7699e-03, PNorm = 153.9279, GNorm = 0.0428, lr_0 = 1.7313e-04
Loss = 5.3036e-03, PNorm = 153.9311, GNorm = 0.2193, lr_0 = 1.7301e-04
Loss = 1.4610e-03, PNorm = 153.9354, GNorm = 0.2022, lr_0 = 1.7289e-04
Loss = 1.3074e-03, PNorm = 153.9379, GNorm = 0.0768, lr_0 = 1.7277e-04
Loss = 1.4900e-03, PNorm = 153.9406, GNorm = 0.1128, lr_0 = 1.7265e-04
Loss = 2.6845e-03, PNorm = 153.9444, GNorm = 0.0875, lr_0 = 1.7253e-04
Loss = 2.3611e-03, PNorm = 153.9471, GNorm = 0.0845, lr_0 = 1.7242e-04
Loss = 1.6281e-03, PNorm = 153.9491, GNorm = 0.0495, lr_0 = 1.7230e-04
Loss = 1.3958e-03, PNorm = 153.9522, GNorm = 0.2245, lr_0 = 1.7218e-04
Loss = 2.5755e-03, PNorm = 153.9538, GNorm = 0.1521, lr_0 = 1.7206e-04
Loss = 2.0084e-03, PNorm = 153.9563, GNorm = 0.1162, lr_0 = 1.7194e-04
Loss = 1.9189e-03, PNorm = 153.9596, GNorm = 0.0456, lr_0 = 1.7183e-04
Loss = 2.5061e-03, PNorm = 153.9627, GNorm = 0.1330, lr_0 = 1.7171e-04
Loss = 1.9599e-03, PNorm = 153.9622, GNorm = 0.1026, lr_0 = 1.7159e-04
Loss = 9.8488e-04, PNorm = 153.9644, GNorm = 0.0412, lr_0 = 1.7147e-04
Loss = 1.2464e-03, PNorm = 153.9690, GNorm = 0.0571, lr_0 = 1.7136e-04
Loss = 1.3663e-03, PNorm = 153.9733, GNorm = 0.1002, lr_0 = 1.7124e-04
Loss = 2.4396e-03, PNorm = 153.9764, GNorm = 0.1552, lr_0 = 1.7112e-04
Loss = 1.7116e-03, PNorm = 153.9805, GNorm = 0.1695, lr_0 = 1.7100e-04
Loss = 4.8134e-03, PNorm = 153.9832, GNorm = 0.3040, lr_0 = 1.7089e-04
Loss = 1.5455e-03, PNorm = 153.9867, GNorm = 0.0459, lr_0 = 1.7077e-04
Loss = 1.5420e-03, PNorm = 153.9898, GNorm = 0.0418, lr_0 = 1.7065e-04
Loss = 1.5063e-03, PNorm = 153.9915, GNorm = 0.0382, lr_0 = 1.7054e-04
Loss = 3.8393e-03, PNorm = 153.9929, GNorm = 0.0778, lr_0 = 1.7042e-04
Loss = 1.3774e-03, PNorm = 153.9955, GNorm = 0.1618, lr_0 = 1.7030e-04
Loss = 2.1191e-03, PNorm = 153.9984, GNorm = 0.2332, lr_0 = 1.7019e-04
Loss = 1.9225e-03, PNorm = 154.0017, GNorm = 0.0880, lr_0 = 1.7007e-04
Loss = 1.3855e-03, PNorm = 154.0042, GNorm = 0.1401, lr_0 = 1.6995e-04
Loss = 1.5040e-03, PNorm = 154.0092, GNorm = 0.1147, lr_0 = 1.6984e-04
Loss = 3.6951e-03, PNorm = 154.0116, GNorm = 0.1611, lr_0 = 1.6972e-04
Loss = 2.0769e-03, PNorm = 154.0143, GNorm = 0.1027, lr_0 = 1.6960e-04
Loss = 2.6637e-03, PNorm = 154.0160, GNorm = 0.0946, lr_0 = 1.6949e-04
Loss = 1.6382e-03, PNorm = 154.0194, GNorm = 0.1666, lr_0 = 1.6937e-04
Loss = 3.2167e-03, PNorm = 154.0229, GNorm = 0.1249, lr_0 = 1.6926e-04
Loss = 1.4500e-03, PNorm = 154.0250, GNorm = 0.1042, lr_0 = 1.6914e-04
Loss = 2.2483e-03, PNorm = 154.0257, GNorm = 0.1507, lr_0 = 1.6902e-04
Loss = 1.3846e-03, PNorm = 154.0291, GNorm = 0.0738, lr_0 = 1.6891e-04
Loss = 1.6502e-03, PNorm = 154.0338, GNorm = 0.0575, lr_0 = 1.6879e-04
Loss = 1.4720e-03, PNorm = 154.0369, GNorm = 0.1114, lr_0 = 1.6868e-04
Loss = 2.7506e-03, PNorm = 154.0405, GNorm = 0.2193, lr_0 = 1.6856e-04
Loss = 1.2509e-03, PNorm = 154.0428, GNorm = 0.1507, lr_0 = 1.6845e-04
Loss = 3.1716e-03, PNorm = 154.0461, GNorm = 0.2351, lr_0 = 1.6833e-04
Loss = 2.1407e-03, PNorm = 154.0512, GNorm = 0.1215, lr_0 = 1.6821e-04
Loss = 3.3267e-03, PNorm = 154.0561, GNorm = 0.1440, lr_0 = 1.6810e-04
Loss = 1.5339e-03, PNorm = 154.0571, GNorm = 0.0969, lr_0 = 1.6798e-04
Loss = 3.4369e-03, PNorm = 154.0570, GNorm = 0.1899, lr_0 = 1.6787e-04
Loss = 9.6615e-04, PNorm = 154.0587, GNorm = 0.0805, lr_0 = 1.6775e-04
Loss = 1.0633e-03, PNorm = 154.0611, GNorm = 0.0719, lr_0 = 1.6764e-04
Loss = 1.2353e-03, PNorm = 154.0636, GNorm = 0.0863, lr_0 = 1.6752e-04
Loss = 1.3088e-03, PNorm = 154.0669, GNorm = 0.1550, lr_0 = 1.6741e-04
Loss = 3.0341e-03, PNorm = 154.0708, GNorm = 0.1167, lr_0 = 1.6729e-04
Loss = 1.2365e-03, PNorm = 154.0727, GNorm = 0.1620, lr_0 = 1.6718e-04
Loss = 2.8682e-03, PNorm = 154.0763, GNorm = 0.3982, lr_0 = 1.6707e-04
Loss = 1.4657e-03, PNorm = 154.0809, GNorm = 0.1904, lr_0 = 1.6695e-04
Loss = 1.8193e-03, PNorm = 154.0847, GNorm = 0.0525, lr_0 = 1.6684e-04
Loss = 1.0005e-03, PNorm = 154.0884, GNorm = 0.0668, lr_0 = 1.6672e-04
Loss = 1.0142e-03, PNorm = 154.0930, GNorm = 0.1409, lr_0 = 1.6661e-04
Loss = 1.8784e-03, PNorm = 154.0934, GNorm = 0.1391, lr_0 = 1.6649e-04
Loss = 2.1419e-03, PNorm = 154.0959, GNorm = 0.2231, lr_0 = 1.6638e-04
Loss = 1.2632e-03, PNorm = 154.0994, GNorm = 0.0667, lr_0 = 1.6627e-04
Loss = 1.2719e-03, PNorm = 154.1013, GNorm = 0.1169, lr_0 = 1.6615e-04
Loss = 2.9322e-03, PNorm = 154.1035, GNorm = 0.1150, lr_0 = 1.6604e-04
Loss = 2.6843e-03, PNorm = 154.1047, GNorm = 0.3816, lr_0 = 1.6592e-04
Loss = 1.1113e-03, PNorm = 154.1073, GNorm = 0.1776, lr_0 = 1.6581e-04
Loss = 2.1458e-03, PNorm = 154.1101, GNorm = 0.1205, lr_0 = 1.6570e-04
Loss = 2.4458e-03, PNorm = 154.1123, GNorm = 0.3507, lr_0 = 1.6558e-04
Loss = 1.5175e-03, PNorm = 154.1146, GNorm = 0.0749, lr_0 = 1.6547e-04
Loss = 2.3010e-03, PNorm = 154.1155, GNorm = 0.0677, lr_0 = 1.6536e-04
Loss = 3.0057e-03, PNorm = 154.1165, GNorm = 0.1288, lr_0 = 1.6524e-04
Loss = 1.7226e-03, PNorm = 154.1189, GNorm = 0.0835, lr_0 = 1.6513e-04
Loss = 1.3535e-03, PNorm = 154.1201, GNorm = 0.2199, lr_0 = 1.6502e-04
Loss = 1.3874e-03, PNorm = 154.1232, GNorm = 0.1142, lr_0 = 1.6490e-04
Loss = 1.1185e-03, PNorm = 154.1257, GNorm = 0.1385, lr_0 = 1.6479e-04
Loss = 1.1622e-03, PNorm = 154.1284, GNorm = 0.0977, lr_0 = 1.6468e-04
Loss = 3.2028e-03, PNorm = 154.1296, GNorm = 0.1986, lr_0 = 1.6457e-04
Loss = 1.4222e-03, PNorm = 154.1317, GNorm = 0.0983, lr_0 = 1.6445e-04
Loss = 2.1730e-03, PNorm = 154.1371, GNorm = 0.0946, lr_0 = 1.6434e-04
Loss = 1.3433e-03, PNorm = 154.1403, GNorm = 0.0537, lr_0 = 1.6423e-04
Loss = 3.0220e-03, PNorm = 154.1426, GNorm = 0.0995, lr_0 = 1.6412e-04
Loss = 1.2985e-03, PNorm = 154.1469, GNorm = 0.1220, lr_0 = 1.6400e-04
Loss = 1.7358e-03, PNorm = 154.1507, GNorm = 0.0680, lr_0 = 1.6389e-04
Loss = 1.5884e-03, PNorm = 154.1545, GNorm = 0.1869, lr_0 = 1.6378e-04
Validation mae = 0.475683
Epoch 24
Loss = 2.1277e-03, PNorm = 154.1570, GNorm = 0.0706, lr_0 = 1.6367e-04
Loss = 1.4389e-03, PNorm = 154.1587, GNorm = 0.1204, lr_0 = 1.6355e-04
Loss = 1.1524e-03, PNorm = 154.1607, GNorm = 0.0469, lr_0 = 1.6344e-04
Loss = 1.0066e-03, PNorm = 154.1617, GNorm = 0.0938, lr_0 = 1.6333e-04
Loss = 1.7484e-03, PNorm = 154.1622, GNorm = 0.0636, lr_0 = 1.6322e-04
Loss = 1.1410e-03, PNorm = 154.1626, GNorm = 0.0661, lr_0 = 1.6311e-04
Loss = 1.1299e-03, PNorm = 154.1643, GNorm = 0.0515, lr_0 = 1.6299e-04
Loss = 1.6739e-03, PNorm = 154.1652, GNorm = 0.0967, lr_0 = 1.6288e-04
Loss = 1.8354e-03, PNorm = 154.1665, GNorm = 0.0940, lr_0 = 1.6277e-04
Loss = 1.5859e-03, PNorm = 154.1694, GNorm = 0.0793, lr_0 = 1.6266e-04
Loss = 9.3050e-04, PNorm = 154.1708, GNorm = 0.1475, lr_0 = 1.6255e-04
Loss = 5.8290e-03, PNorm = 154.1728, GNorm = 0.1111, lr_0 = 1.6244e-04
Loss = 9.1772e-04, PNorm = 154.1749, GNorm = 0.0828, lr_0 = 1.6233e-04
Loss = 1.7152e-03, PNorm = 154.1773, GNorm = 0.1028, lr_0 = 1.6221e-04
Loss = 1.0269e-03, PNorm = 154.1803, GNorm = 0.1065, lr_0 = 1.6210e-04
Loss = 1.6193e-03, PNorm = 154.1823, GNorm = 0.2347, lr_0 = 1.6199e-04
Loss = 2.4418e-03, PNorm = 154.1841, GNorm = 0.0583, lr_0 = 1.6188e-04
Loss = 1.2512e-03, PNorm = 154.1889, GNorm = 0.1141, lr_0 = 1.6177e-04
Loss = 1.7086e-03, PNorm = 154.1909, GNorm = 0.0938, lr_0 = 1.6166e-04
Loss = 9.4690e-04, PNorm = 154.1938, GNorm = 0.0515, lr_0 = 1.6155e-04
Loss = 1.1156e-03, PNorm = 154.1960, GNorm = 0.1117, lr_0 = 1.6144e-04
Loss = 7.8869e-04, PNorm = 154.1985, GNorm = 0.0811, lr_0 = 1.6133e-04
Loss = 1.5097e-03, PNorm = 154.2000, GNorm = 0.0553, lr_0 = 1.6122e-04
Loss = 1.0362e-03, PNorm = 154.2024, GNorm = 0.1424, lr_0 = 1.6111e-04
Loss = 2.7186e-03, PNorm = 154.2053, GNorm = 0.1672, lr_0 = 1.6100e-04
Loss = 1.4682e-03, PNorm = 154.2088, GNorm = 0.2644, lr_0 = 1.6089e-04
Loss = 1.8294e-03, PNorm = 154.2106, GNorm = 0.1089, lr_0 = 1.6078e-04
Loss = 1.1658e-03, PNorm = 154.2135, GNorm = 0.1136, lr_0 = 1.6067e-04
Loss = 1.3394e-03, PNorm = 154.2154, GNorm = 0.1071, lr_0 = 1.6056e-04
Loss = 2.2391e-03, PNorm = 154.2171, GNorm = 0.0936, lr_0 = 1.6045e-04
Loss = 2.4084e-03, PNorm = 154.2215, GNorm = 0.0613, lr_0 = 1.6034e-04
Loss = 1.3222e-03, PNorm = 154.2234, GNorm = 0.0944, lr_0 = 1.6023e-04
Loss = 3.2723e-03, PNorm = 154.2245, GNorm = 0.1561, lr_0 = 1.6012e-04
Loss = 1.1031e-03, PNorm = 154.2271, GNorm = 0.0498, lr_0 = 1.6001e-04
Loss = 1.0455e-03, PNorm = 154.2292, GNorm = 0.1636, lr_0 = 1.5990e-04
Loss = 1.5518e-03, PNorm = 154.2316, GNorm = 0.0633, lr_0 = 1.5979e-04
Loss = 9.5138e-04, PNorm = 154.2346, GNorm = 0.1314, lr_0 = 1.5968e-04
Loss = 2.4018e-03, PNorm = 154.2353, GNorm = 0.1437, lr_0 = 1.5957e-04
Loss = 1.8947e-03, PNorm = 154.2383, GNorm = 0.0910, lr_0 = 1.5946e-04
Loss = 9.8267e-04, PNorm = 154.2422, GNorm = 0.1992, lr_0 = 1.5935e-04
Loss = 1.7157e-03, PNorm = 154.2454, GNorm = 0.3551, lr_0 = 1.5924e-04
Loss = 3.0712e-03, PNorm = 154.2477, GNorm = 0.3137, lr_0 = 1.5913e-04
Loss = 1.1134e-03, PNorm = 154.2494, GNorm = 0.1256, lr_0 = 1.5902e-04
Loss = 1.3597e-03, PNorm = 154.2505, GNorm = 0.0535, lr_0 = 1.5891e-04
Loss = 1.7419e-03, PNorm = 154.2537, GNorm = 0.1379, lr_0 = 1.5880e-04
Loss = 1.3898e-03, PNorm = 154.2551, GNorm = 0.1988, lr_0 = 1.5870e-04
Loss = 9.3367e-04, PNorm = 154.2590, GNorm = 0.0947, lr_0 = 1.5859e-04
Loss = 2.6539e-03, PNorm = 154.2620, GNorm = 0.2452, lr_0 = 1.5848e-04
Loss = 1.9446e-03, PNorm = 154.2657, GNorm = 0.0723, lr_0 = 1.5837e-04
Loss = 2.3410e-03, PNorm = 154.2680, GNorm = 0.3299, lr_0 = 1.5826e-04
Loss = 1.5576e-03, PNorm = 154.2699, GNorm = 0.1912, lr_0 = 1.5815e-04
Loss = 1.8030e-03, PNorm = 154.2714, GNorm = 0.1546, lr_0 = 1.5804e-04
Loss = 1.8748e-03, PNorm = 154.2759, GNorm = 0.1117, lr_0 = 1.5794e-04
Loss = 1.4415e-03, PNorm = 154.2792, GNorm = 0.0256, lr_0 = 1.5783e-04
Loss = 1.9985e-03, PNorm = 154.2814, GNorm = 0.0312, lr_0 = 1.5772e-04
Loss = 2.2092e-03, PNorm = 154.2842, GNorm = 0.1382, lr_0 = 1.5761e-04
Loss = 2.2145e-03, PNorm = 154.2874, GNorm = 0.0650, lr_0 = 1.5750e-04
Loss = 1.6313e-03, PNorm = 154.2933, GNorm = 0.2071, lr_0 = 1.5740e-04
Loss = 9.9764e-04, PNorm = 154.2976, GNorm = 0.0486, lr_0 = 1.5729e-04
Loss = 1.1267e-03, PNorm = 154.3003, GNorm = 0.0629, lr_0 = 1.5718e-04
Loss = 9.7478e-04, PNorm = 154.3015, GNorm = 0.2048, lr_0 = 1.5707e-04
Loss = 9.6485e-04, PNorm = 154.3051, GNorm = 0.1836, lr_0 = 1.5697e-04
Loss = 8.1751e-04, PNorm = 154.3065, GNorm = 0.1004, lr_0 = 1.5686e-04
Loss = 1.0346e-03, PNorm = 154.3087, GNorm = 0.0823, lr_0 = 1.5675e-04
Loss = 1.5984e-03, PNorm = 154.3114, GNorm = 0.1098, lr_0 = 1.5664e-04
Loss = 2.3915e-03, PNorm = 154.3129, GNorm = 0.1183, lr_0 = 1.5654e-04
Loss = 2.1930e-03, PNorm = 154.3172, GNorm = 0.1947, lr_0 = 1.5643e-04
Loss = 1.4909e-03, PNorm = 154.3208, GNorm = 0.0994, lr_0 = 1.5632e-04
Loss = 9.9106e-04, PNorm = 154.3249, GNorm = 0.0500, lr_0 = 1.5621e-04
Loss = 2.7556e-03, PNorm = 154.3259, GNorm = 0.0666, lr_0 = 1.5611e-04
Loss = 1.6798e-03, PNorm = 154.3288, GNorm = 0.1127, lr_0 = 1.5600e-04
Loss = 1.2756e-03, PNorm = 154.3312, GNorm = 0.0472, lr_0 = 1.5589e-04
Loss = 8.4895e-04, PNorm = 154.3345, GNorm = 0.0253, lr_0 = 1.5579e-04
Loss = 3.2621e-03, PNorm = 154.3380, GNorm = 0.1632, lr_0 = 1.5568e-04
Loss = 1.1987e-03, PNorm = 154.3386, GNorm = 0.0778, lr_0 = 1.5557e-04
Loss = 2.9516e-03, PNorm = 154.3419, GNorm = 0.0843, lr_0 = 1.5547e-04
Loss = 1.8986e-03, PNorm = 154.3434, GNorm = 0.2171, lr_0 = 1.5536e-04
Loss = 1.5455e-03, PNorm = 154.3463, GNorm = 0.0404, lr_0 = 1.5525e-04
Loss = 1.0640e-03, PNorm = 154.3474, GNorm = 0.0922, lr_0 = 1.5515e-04
Loss = 2.6754e-03, PNorm = 154.3502, GNorm = 0.1498, lr_0 = 1.5504e-04
Loss = 1.1618e-03, PNorm = 154.3502, GNorm = 0.1040, lr_0 = 1.5493e-04
Loss = 1.9363e-03, PNorm = 154.3534, GNorm = 0.0810, lr_0 = 1.5483e-04
Loss = 1.2202e-03, PNorm = 154.3552, GNorm = 0.1262, lr_0 = 1.5472e-04
Loss = 1.3183e-03, PNorm = 154.3588, GNorm = 0.0922, lr_0 = 1.5462e-04
Loss = 1.1777e-03, PNorm = 154.3613, GNorm = 0.0403, lr_0 = 1.5451e-04
Loss = 2.3410e-03, PNorm = 154.3628, GNorm = 0.1233, lr_0 = 1.5440e-04
Loss = 1.9299e-03, PNorm = 154.3656, GNorm = 0.0997, lr_0 = 1.5430e-04
Loss = 1.0750e-03, PNorm = 154.3691, GNorm = 0.1433, lr_0 = 1.5419e-04
Loss = 1.4561e-03, PNorm = 154.3700, GNorm = 0.0368, lr_0 = 1.5409e-04
Loss = 2.5536e-03, PNorm = 154.3730, GNorm = 0.0747, lr_0 = 1.5398e-04
Loss = 2.0007e-03, PNorm = 154.3764, GNorm = 0.1373, lr_0 = 1.5388e-04
Loss = 1.1605e-03, PNorm = 154.3788, GNorm = 0.0421, lr_0 = 1.5377e-04
Loss = 1.6446e-03, PNorm = 154.3807, GNorm = 0.0441, lr_0 = 1.5367e-04
Loss = 1.3676e-03, PNorm = 154.3843, GNorm = 0.1654, lr_0 = 1.5356e-04
Loss = 1.8724e-03, PNorm = 154.3870, GNorm = 0.1607, lr_0 = 1.5346e-04
Loss = 1.0087e-03, PNorm = 154.3898, GNorm = 0.1119, lr_0 = 1.5335e-04
Loss = 1.2492e-03, PNorm = 154.3916, GNorm = 0.2592, lr_0 = 1.5325e-04
Loss = 1.3822e-03, PNorm = 154.3929, GNorm = 0.0500, lr_0 = 1.5314e-04
Loss = 1.5517e-03, PNorm = 154.3936, GNorm = 0.1750, lr_0 = 1.5304e-04
Loss = 1.1398e-03, PNorm = 154.3956, GNorm = 0.0468, lr_0 = 1.5293e-04
Loss = 1.2566e-03, PNorm = 154.3973, GNorm = 0.0520, lr_0 = 1.5283e-04
Loss = 1.9557e-03, PNorm = 154.4020, GNorm = 0.1001, lr_0 = 1.5272e-04
Loss = 1.8331e-03, PNorm = 154.4069, GNorm = 0.1698, lr_0 = 1.5262e-04
Loss = 1.4945e-03, PNorm = 154.4104, GNorm = 0.0705, lr_0 = 1.5251e-04
Loss = 1.8771e-03, PNorm = 154.4113, GNorm = 0.1590, lr_0 = 1.5241e-04
Loss = 1.9145e-03, PNorm = 154.4136, GNorm = 0.1944, lr_0 = 1.5230e-04
Loss = 1.9885e-03, PNorm = 154.4143, GNorm = 0.0796, lr_0 = 1.5220e-04
Loss = 2.8465e-03, PNorm = 154.4157, GNorm = 0.0785, lr_0 = 1.5209e-04
Loss = 2.9464e-03, PNorm = 154.4194, GNorm = 0.1287, lr_0 = 1.5199e-04
Loss = 2.2587e-03, PNorm = 154.4207, GNorm = 0.0905, lr_0 = 1.5189e-04
Loss = 1.0971e-03, PNorm = 154.4235, GNorm = 0.0378, lr_0 = 1.5178e-04
Loss = 2.3144e-03, PNorm = 154.4242, GNorm = 0.2837, lr_0 = 1.5168e-04
Loss = 2.3165e-03, PNorm = 154.4270, GNorm = 0.2226, lr_0 = 1.5157e-04
Loss = 9.9116e-04, PNorm = 154.4286, GNorm = 0.0950, lr_0 = 1.5147e-04
Loss = 1.8778e-03, PNorm = 154.4303, GNorm = 0.5700, lr_0 = 1.5137e-04
Loss = 1.8432e-03, PNorm = 154.4311, GNorm = 0.0884, lr_0 = 1.5126e-04
Loss = 1.5401e-03, PNorm = 154.4335, GNorm = 0.2224, lr_0 = 1.5116e-04
Loss = 1.0589e-03, PNorm = 154.4364, GNorm = 0.0567, lr_0 = 1.5106e-04
Loss = 1.7127e-03, PNorm = 154.4391, GNorm = 0.0620, lr_0 = 1.5095e-04
Loss = 1.9033e-03, PNorm = 154.4408, GNorm = 0.1275, lr_0 = 1.5085e-04
Validation mae = 0.475943
Epoch 25
Loss = 1.2987e-03, PNorm = 154.4413, GNorm = 0.0755, lr_0 = 1.5075e-04
Loss = 1.4328e-03, PNorm = 154.4407, GNorm = 0.1044, lr_0 = 1.5064e-04
Loss = 9.6425e-04, PNorm = 154.4418, GNorm = 0.0444, lr_0 = 1.5054e-04
Loss = 1.4869e-03, PNorm = 154.4434, GNorm = 0.1042, lr_0 = 1.5044e-04
Loss = 1.8211e-03, PNorm = 154.4451, GNorm = 0.0855, lr_0 = 1.5033e-04
Loss = 8.3697e-04, PNorm = 154.4458, GNorm = 0.1033, lr_0 = 1.5023e-04
Loss = 1.6329e-03, PNorm = 154.4471, GNorm = 0.1110, lr_0 = 1.5013e-04
Loss = 1.2157e-03, PNorm = 154.4489, GNorm = 0.1241, lr_0 = 1.5002e-04
Loss = 7.7807e-04, PNorm = 154.4504, GNorm = 0.0560, lr_0 = 1.4992e-04
Loss = 1.2702e-03, PNorm = 154.4520, GNorm = 0.0705, lr_0 = 1.4982e-04
Loss = 1.5542e-03, PNorm = 154.4527, GNorm = 0.1041, lr_0 = 1.4972e-04
Loss = 1.6595e-03, PNorm = 154.4550, GNorm = 0.0273, lr_0 = 1.4961e-04
Loss = 1.4604e-03, PNorm = 154.4581, GNorm = 0.0524, lr_0 = 1.4951e-04
Loss = 1.3133e-03, PNorm = 154.4606, GNorm = 0.0297, lr_0 = 1.4941e-04
Loss = 2.7507e-03, PNorm = 154.4601, GNorm = 0.0305, lr_0 = 1.4931e-04
Loss = 1.2008e-03, PNorm = 154.4623, GNorm = 0.1061, lr_0 = 1.4920e-04
Loss = 1.2119e-03, PNorm = 154.4649, GNorm = 0.1470, lr_0 = 1.4910e-04
Loss = 1.0810e-03, PNorm = 154.4672, GNorm = 0.0768, lr_0 = 1.4900e-04
Loss = 1.1983e-03, PNorm = 154.4672, GNorm = 0.0400, lr_0 = 1.4890e-04
Loss = 7.1001e-04, PNorm = 154.4688, GNorm = 0.0773, lr_0 = 1.4880e-04
Loss = 8.3295e-04, PNorm = 154.4718, GNorm = 0.0700, lr_0 = 1.4869e-04
Loss = 1.2929e-03, PNorm = 154.4742, GNorm = 0.0618, lr_0 = 1.4859e-04
Loss = 1.1842e-03, PNorm = 154.4773, GNorm = 0.1695, lr_0 = 1.4849e-04
Loss = 1.4970e-03, PNorm = 154.4795, GNorm = 0.0922, lr_0 = 1.4839e-04
Loss = 1.8050e-03, PNorm = 154.4822, GNorm = 0.0680, lr_0 = 1.4829e-04
Loss = 8.7975e-04, PNorm = 154.4852, GNorm = 0.1177, lr_0 = 1.4818e-04
Loss = 8.9799e-04, PNorm = 154.4870, GNorm = 0.1387, lr_0 = 1.4808e-04
Loss = 9.9714e-04, PNorm = 154.4873, GNorm = 0.2442, lr_0 = 1.4798e-04
Loss = 3.3694e-03, PNorm = 154.4903, GNorm = 0.5941, lr_0 = 1.4788e-04
Loss = 1.1698e-03, PNorm = 154.4938, GNorm = 0.1154, lr_0 = 1.4778e-04
Loss = 9.5207e-04, PNorm = 154.4973, GNorm = 0.1852, lr_0 = 1.4768e-04
Loss = 1.4113e-03, PNorm = 154.5007, GNorm = 0.1539, lr_0 = 1.4758e-04
Loss = 1.3111e-03, PNorm = 154.5033, GNorm = 0.0585, lr_0 = 1.4748e-04
Loss = 1.0765e-03, PNorm = 154.5054, GNorm = 0.0469, lr_0 = 1.4737e-04
Loss = 2.7218e-03, PNorm = 154.5080, GNorm = 0.2149, lr_0 = 1.4727e-04
Loss = 8.3506e-04, PNorm = 154.5119, GNorm = 0.0745, lr_0 = 1.4717e-04
Loss = 1.1082e-03, PNorm = 154.5155, GNorm = 0.1432, lr_0 = 1.4707e-04
Loss = 9.0933e-04, PNorm = 154.5168, GNorm = 0.1587, lr_0 = 1.4697e-04
Loss = 2.0307e-03, PNorm = 154.5183, GNorm = 0.0844, lr_0 = 1.4687e-04
Loss = 8.9055e-04, PNorm = 154.5195, GNorm = 0.1074, lr_0 = 1.4677e-04
Loss = 2.1877e-03, PNorm = 154.5204, GNorm = 0.0546, lr_0 = 1.4667e-04
Loss = 9.4280e-04, PNorm = 154.5212, GNorm = 0.1048, lr_0 = 1.4657e-04
Loss = 2.4537e-03, PNorm = 154.5240, GNorm = 0.1162, lr_0 = 1.4647e-04
Loss = 2.1462e-03, PNorm = 154.5258, GNorm = 0.3090, lr_0 = 1.4637e-04
Loss = 1.2644e-03, PNorm = 154.5272, GNorm = 0.0517, lr_0 = 1.4627e-04
Loss = 9.3927e-04, PNorm = 154.5283, GNorm = 0.1034, lr_0 = 1.4617e-04
Loss = 1.2052e-03, PNorm = 154.5304, GNorm = 0.0858, lr_0 = 1.4607e-04
Loss = 1.7859e-03, PNorm = 154.5323, GNorm = 0.1126, lr_0 = 1.4597e-04
Loss = 9.1146e-04, PNorm = 154.5346, GNorm = 0.1150, lr_0 = 1.4587e-04
Loss = 1.2716e-03, PNorm = 154.5342, GNorm = 0.1066, lr_0 = 1.4577e-04
Loss = 2.0973e-03, PNorm = 154.5361, GNorm = 0.0647, lr_0 = 1.4567e-04
Loss = 4.1069e-03, PNorm = 154.5377, GNorm = 0.2227, lr_0 = 1.4557e-04
Loss = 8.7223e-04, PNorm = 154.5403, GNorm = 0.1811, lr_0 = 1.4547e-04
Loss = 2.1505e-03, PNorm = 154.5416, GNorm = 0.1202, lr_0 = 1.4537e-04
Loss = 1.3182e-03, PNorm = 154.5445, GNorm = 0.0986, lr_0 = 1.4527e-04
Loss = 2.4959e-03, PNorm = 154.5481, GNorm = 0.1001, lr_0 = 1.4517e-04
Loss = 1.2583e-03, PNorm = 154.5517, GNorm = 0.0798, lr_0 = 1.4507e-04
Loss = 7.3894e-04, PNorm = 154.5519, GNorm = 0.0669, lr_0 = 1.4497e-04
Loss = 7.6447e-04, PNorm = 154.5532, GNorm = 0.0775, lr_0 = 1.4487e-04
Loss = 1.7969e-03, PNorm = 154.5522, GNorm = 0.1068, lr_0 = 1.4477e-04
Loss = 1.2468e-03, PNorm = 154.5535, GNorm = 0.1212, lr_0 = 1.4467e-04
Loss = 7.8349e-04, PNorm = 154.5572, GNorm = 0.0693, lr_0 = 1.4457e-04
Loss = 8.1301e-04, PNorm = 154.5595, GNorm = 0.0417, lr_0 = 1.4447e-04
Loss = 8.5714e-04, PNorm = 154.5611, GNorm = 0.1279, lr_0 = 1.4438e-04
Loss = 1.0936e-03, PNorm = 154.5637, GNorm = 0.0987, lr_0 = 1.4428e-04
Loss = 1.4383e-03, PNorm = 154.5663, GNorm = 0.1163, lr_0 = 1.4418e-04
Loss = 1.6705e-03, PNorm = 154.5690, GNorm = 0.0755, lr_0 = 1.4408e-04
Loss = 1.0621e-03, PNorm = 154.5724, GNorm = 0.0398, lr_0 = 1.4398e-04
Loss = 1.3401e-03, PNorm = 154.5745, GNorm = 0.0500, lr_0 = 1.4388e-04
Loss = 2.9129e-03, PNorm = 154.5759, GNorm = 0.2660, lr_0 = 1.4378e-04
Loss = 1.5286e-03, PNorm = 154.5766, GNorm = 0.1849, lr_0 = 1.4368e-04
Loss = 9.1468e-04, PNorm = 154.5795, GNorm = 0.0477, lr_0 = 1.4359e-04
Loss = 1.4804e-03, PNorm = 154.5804, GNorm = 0.1252, lr_0 = 1.4349e-04
Loss = 1.3502e-03, PNorm = 154.5813, GNorm = 0.0257, lr_0 = 1.4339e-04
Loss = 2.3078e-03, PNorm = 154.5813, GNorm = 0.1729, lr_0 = 1.4329e-04
Loss = 8.9892e-04, PNorm = 154.5840, GNorm = 0.0879, lr_0 = 1.4319e-04
Loss = 1.7244e-03, PNorm = 154.5860, GNorm = 0.2226, lr_0 = 1.4310e-04
Loss = 1.1030e-03, PNorm = 154.5901, GNorm = 0.1236, lr_0 = 1.4300e-04
Loss = 2.7163e-03, PNorm = 154.5925, GNorm = 0.1648, lr_0 = 1.4290e-04
Loss = 8.6618e-04, PNorm = 154.5945, GNorm = 0.1128, lr_0 = 1.4280e-04
Loss = 1.6567e-03, PNorm = 154.5959, GNorm = 0.1715, lr_0 = 1.4270e-04
Loss = 2.0959e-03, PNorm = 154.5981, GNorm = 0.2859, lr_0 = 1.4261e-04
Loss = 2.9097e-03, PNorm = 154.6017, GNorm = 0.0583, lr_0 = 1.4251e-04
Loss = 3.0750e-03, PNorm = 154.6033, GNorm = 0.0691, lr_0 = 1.4241e-04
Loss = 1.0333e-03, PNorm = 154.6053, GNorm = 0.1217, lr_0 = 1.4231e-04
Loss = 2.6062e-03, PNorm = 154.6079, GNorm = 0.4791, lr_0 = 1.4222e-04
Loss = 9.2089e-04, PNorm = 154.6093, GNorm = 0.0443, lr_0 = 1.4212e-04
Loss = 4.9104e-03, PNorm = 154.6115, GNorm = 0.0832, lr_0 = 1.4202e-04
Loss = 2.5444e-03, PNorm = 154.6166, GNorm = 0.1680, lr_0 = 1.4192e-04
Loss = 1.6944e-03, PNorm = 154.6206, GNorm = 0.0925, lr_0 = 1.4183e-04
Loss = 8.1258e-04, PNorm = 154.6228, GNorm = 0.0788, lr_0 = 1.4173e-04
Loss = 9.3734e-04, PNorm = 154.6257, GNorm = 0.0822, lr_0 = 1.4163e-04
Loss = 1.9192e-03, PNorm = 154.6273, GNorm = 0.0678, lr_0 = 1.4153e-04
Loss = 1.4054e-03, PNorm = 154.6289, GNorm = 0.3893, lr_0 = 1.4144e-04
Loss = 8.9281e-04, PNorm = 154.6305, GNorm = 0.0928, lr_0 = 1.4134e-04
Loss = 2.2952e-03, PNorm = 154.6315, GNorm = 0.4184, lr_0 = 1.4124e-04
Loss = 2.9823e-03, PNorm = 154.6328, GNorm = 0.0518, lr_0 = 1.4115e-04
Loss = 2.4791e-03, PNorm = 154.6345, GNorm = 0.0657, lr_0 = 1.4105e-04
Loss = 8.2727e-04, PNorm = 154.6394, GNorm = 0.2336, lr_0 = 1.4095e-04
Loss = 1.3268e-03, PNorm = 154.6433, GNorm = 0.0999, lr_0 = 1.4086e-04
Loss = 1.0517e-03, PNorm = 154.6444, GNorm = 0.1934, lr_0 = 1.4076e-04
Loss = 2.2968e-03, PNorm = 154.6456, GNorm = 0.1603, lr_0 = 1.4066e-04
Loss = 1.0220e-03, PNorm = 154.6467, GNorm = 0.1277, lr_0 = 1.4057e-04
Loss = 2.1524e-03, PNorm = 154.6501, GNorm = 0.1576, lr_0 = 1.4047e-04
Loss = 1.3828e-03, PNorm = 154.6535, GNorm = 0.1651, lr_0 = 1.4038e-04
Loss = 1.2304e-03, PNorm = 154.6546, GNorm = 0.0270, lr_0 = 1.4028e-04
Loss = 1.0406e-03, PNorm = 154.6575, GNorm = 0.1769, lr_0 = 1.4018e-04
Loss = 1.0476e-03, PNorm = 154.6604, GNorm = 0.1956, lr_0 = 1.4009e-04
Loss = 1.5577e-03, PNorm = 154.6621, GNorm = 0.1396, lr_0 = 1.3999e-04
Loss = 8.0890e-04, PNorm = 154.6637, GNorm = 0.0795, lr_0 = 1.3990e-04
Loss = 1.8205e-03, PNorm = 154.6640, GNorm = 0.2550, lr_0 = 1.3980e-04
Loss = 1.0565e-03, PNorm = 154.6647, GNorm = 0.1204, lr_0 = 1.3970e-04
Loss = 1.9697e-03, PNorm = 154.6670, GNorm = 0.1195, lr_0 = 1.3961e-04
Loss = 1.8633e-03, PNorm = 154.6686, GNorm = 0.0460, lr_0 = 1.3951e-04
Loss = 9.8452e-04, PNorm = 154.6704, GNorm = 0.0605, lr_0 = 1.3942e-04
Loss = 1.4058e-03, PNorm = 154.6727, GNorm = 0.0891, lr_0 = 1.3932e-04
Loss = 6.9493e-04, PNorm = 154.6740, GNorm = 0.1034, lr_0 = 1.3923e-04
Loss = 1.0518e-03, PNorm = 154.6757, GNorm = 0.1197, lr_0 = 1.3913e-04
Loss = 1.9648e-03, PNorm = 154.6773, GNorm = 0.2577, lr_0 = 1.3904e-04
Loss = 1.1105e-03, PNorm = 154.6796, GNorm = 0.1870, lr_0 = 1.3894e-04
Validation mae = 0.476130
Epoch 26
Loss = 1.2024e-03, PNorm = 154.6833, GNorm = 0.1000, lr_0 = 1.3884e-04
Loss = 7.6619e-04, PNorm = 154.6842, GNorm = 0.0328, lr_0 = 1.3875e-04
Loss = 3.8231e-03, PNorm = 154.6852, GNorm = 0.1584, lr_0 = 1.3865e-04
Loss = 1.9642e-03, PNorm = 154.6861, GNorm = 0.2244, lr_0 = 1.3856e-04
Loss = 1.0785e-03, PNorm = 154.6872, GNorm = 0.0320, lr_0 = 1.3846e-04
Loss = 2.4876e-03, PNorm = 154.6887, GNorm = 0.0608, lr_0 = 1.3837e-04
Loss = 1.7969e-03, PNorm = 154.6890, GNorm = 0.0456, lr_0 = 1.3828e-04
Loss = 9.9989e-04, PNorm = 154.6884, GNorm = 0.1250, lr_0 = 1.3818e-04
Loss = 7.9626e-04, PNorm = 154.6904, GNorm = 0.0613, lr_0 = 1.3809e-04
Loss = 1.6341e-03, PNorm = 154.6933, GNorm = 0.0989, lr_0 = 1.3799e-04
Loss = 6.6472e-04, PNorm = 154.6967, GNorm = 0.0826, lr_0 = 1.3790e-04
Loss = 1.3002e-03, PNorm = 154.6972, GNorm = 0.1598, lr_0 = 1.3780e-04
Loss = 7.5534e-04, PNorm = 154.6979, GNorm = 0.1013, lr_0 = 1.3771e-04
Loss = 1.5830e-03, PNorm = 154.7005, GNorm = 0.0841, lr_0 = 1.3761e-04
Loss = 1.1173e-03, PNorm = 154.7037, GNorm = 0.0557, lr_0 = 1.3752e-04
Loss = 2.0019e-03, PNorm = 154.7047, GNorm = 0.1152, lr_0 = 1.3742e-04
Loss = 9.9880e-04, PNorm = 154.7075, GNorm = 0.1218, lr_0 = 1.3733e-04
Loss = 7.9812e-04, PNorm = 154.7092, GNorm = 0.0594, lr_0 = 1.3724e-04
Loss = 1.0634e-03, PNorm = 154.7096, GNorm = 0.0944, lr_0 = 1.3714e-04
Loss = 1.1249e-03, PNorm = 154.7097, GNorm = 0.0595, lr_0 = 1.3705e-04
Loss = 7.5868e-04, PNorm = 154.7100, GNorm = 0.1505, lr_0 = 1.3695e-04
Loss = 1.0542e-03, PNorm = 154.7096, GNorm = 0.0857, lr_0 = 1.3686e-04
Loss = 9.8679e-04, PNorm = 154.7089, GNorm = 0.1119, lr_0 = 1.3677e-04
Loss = 2.4539e-03, PNorm = 154.7121, GNorm = 0.1188, lr_0 = 1.3667e-04
Loss = 9.1227e-04, PNorm = 154.7136, GNorm = 0.1034, lr_0 = 1.3658e-04
Loss = 1.1721e-03, PNorm = 154.7167, GNorm = 0.0437, lr_0 = 1.3649e-04
Loss = 1.2719e-03, PNorm = 154.7201, GNorm = 0.1443, lr_0 = 1.3639e-04
Loss = 9.0434e-04, PNorm = 154.7232, GNorm = 0.1039, lr_0 = 1.3630e-04
Loss = 1.3935e-03, PNorm = 154.7256, GNorm = 0.1045, lr_0 = 1.3621e-04
Loss = 1.5734e-03, PNorm = 154.7266, GNorm = 0.1419, lr_0 = 1.3611e-04
Loss = 7.8073e-04, PNorm = 154.7280, GNorm = 0.1636, lr_0 = 1.3602e-04
Loss = 1.5374e-03, PNorm = 154.7287, GNorm = 0.0713, lr_0 = 1.3593e-04
Loss = 9.0177e-04, PNorm = 154.7301, GNorm = 0.1334, lr_0 = 1.3583e-04
Loss = 9.4284e-04, PNorm = 154.7323, GNorm = 0.1010, lr_0 = 1.3574e-04
Loss = 1.3647e-03, PNorm = 154.7338, GNorm = 0.0837, lr_0 = 1.3565e-04
Loss = 1.5521e-03, PNorm = 154.7365, GNorm = 0.1706, lr_0 = 1.3555e-04
Loss = 6.9139e-04, PNorm = 154.7372, GNorm = 0.0608, lr_0 = 1.3546e-04
Loss = 2.0026e-03, PNorm = 154.7406, GNorm = 0.1031, lr_0 = 1.3537e-04
Loss = 6.7943e-04, PNorm = 154.7431, GNorm = 0.0471, lr_0 = 1.3528e-04
Loss = 1.3597e-03, PNorm = 154.7462, GNorm = 0.1046, lr_0 = 1.3518e-04
Loss = 1.6521e-03, PNorm = 154.7473, GNorm = 0.3502, lr_0 = 1.3509e-04
Loss = 1.9311e-03, PNorm = 154.7489, GNorm = 0.1836, lr_0 = 1.3500e-04
Loss = 2.1803e-03, PNorm = 154.7496, GNorm = 0.1393, lr_0 = 1.3491e-04
Loss = 1.2814e-03, PNorm = 154.7508, GNorm = 0.0864, lr_0 = 1.3481e-04
Loss = 1.2766e-03, PNorm = 154.7538, GNorm = 0.0653, lr_0 = 1.3472e-04
Loss = 1.9189e-03, PNorm = 154.7550, GNorm = 0.1257, lr_0 = 1.3463e-04
Loss = 8.4656e-04, PNorm = 154.7576, GNorm = 0.0440, lr_0 = 1.3454e-04
Loss = 9.8662e-04, PNorm = 154.7572, GNorm = 0.0512, lr_0 = 1.3444e-04
Loss = 1.0965e-03, PNorm = 154.7579, GNorm = 0.1209, lr_0 = 1.3435e-04
Loss = 1.5419e-03, PNorm = 154.7604, GNorm = 0.1786, lr_0 = 1.3426e-04
Loss = 8.4775e-04, PNorm = 154.7623, GNorm = 0.0818, lr_0 = 1.3417e-04
Loss = 8.3024e-04, PNorm = 154.7648, GNorm = 0.0540, lr_0 = 1.3408e-04
Loss = 1.1327e-03, PNorm = 154.7679, GNorm = 0.0690, lr_0 = 1.3398e-04
Loss = 1.3095e-03, PNorm = 154.7698, GNorm = 0.0532, lr_0 = 1.3389e-04
Loss = 1.4972e-03, PNorm = 154.7701, GNorm = 0.1564, lr_0 = 1.3380e-04
Loss = 1.9272e-03, PNorm = 154.7705, GNorm = 0.1942, lr_0 = 1.3371e-04
Loss = 7.8290e-04, PNorm = 154.7716, GNorm = 0.1270, lr_0 = 1.3362e-04
Loss = 8.7608e-04, PNorm = 154.7734, GNorm = 0.0788, lr_0 = 1.3353e-04
Loss = 8.8678e-04, PNorm = 154.7748, GNorm = 0.0561, lr_0 = 1.3343e-04
Loss = 7.5375e-04, PNorm = 154.7767, GNorm = 0.0797, lr_0 = 1.3334e-04
Loss = 8.0794e-04, PNorm = 154.7775, GNorm = 0.0872, lr_0 = 1.3325e-04
Loss = 7.5177e-04, PNorm = 154.7771, GNorm = 0.0329, lr_0 = 1.3316e-04
Loss = 1.3158e-03, PNorm = 154.7787, GNorm = 0.0864, lr_0 = 1.3307e-04
Loss = 2.1757e-03, PNorm = 154.7797, GNorm = 0.0794, lr_0 = 1.3298e-04
Loss = 1.7884e-03, PNorm = 154.7824, GNorm = 0.0931, lr_0 = 1.3289e-04
Loss = 1.7565e-03, PNorm = 154.7859, GNorm = 0.1348, lr_0 = 1.3280e-04
Loss = 1.2647e-03, PNorm = 154.7886, GNorm = 0.3214, lr_0 = 1.3270e-04
Loss = 1.8932e-03, PNorm = 154.7910, GNorm = 0.1522, lr_0 = 1.3261e-04
Loss = 1.2705e-03, PNorm = 154.7913, GNorm = 0.1186, lr_0 = 1.3252e-04
Loss = 6.6530e-04, PNorm = 154.7922, GNorm = 0.0643, lr_0 = 1.3243e-04
Loss = 9.0964e-04, PNorm = 154.7935, GNorm = 0.0744, lr_0 = 1.3234e-04
Loss = 1.2510e-03, PNorm = 154.7951, GNorm = 0.1059, lr_0 = 1.3225e-04
Loss = 1.5194e-03, PNorm = 154.7976, GNorm = 0.0903, lr_0 = 1.3216e-04
Loss = 3.6204e-03, PNorm = 154.7980, GNorm = 0.1574, lr_0 = 1.3207e-04
Loss = 6.8315e-04, PNorm = 154.8001, GNorm = 0.1434, lr_0 = 1.3198e-04
Loss = 1.0857e-03, PNorm = 154.8012, GNorm = 0.1131, lr_0 = 1.3189e-04
Loss = 1.5339e-03, PNorm = 154.8030, GNorm = 0.0616, lr_0 = 1.3180e-04
Loss = 7.2397e-04, PNorm = 154.8050, GNorm = 0.0901, lr_0 = 1.3171e-04
Loss = 1.0549e-03, PNorm = 154.8069, GNorm = 0.1164, lr_0 = 1.3162e-04
Loss = 1.4022e-03, PNorm = 154.8081, GNorm = 0.0373, lr_0 = 1.3153e-04
Loss = 1.1101e-03, PNorm = 154.8102, GNorm = 0.1225, lr_0 = 1.3144e-04
Loss = 9.3914e-04, PNorm = 154.8132, GNorm = 0.0460, lr_0 = 1.3135e-04
Loss = 1.1326e-03, PNorm = 154.8146, GNorm = 0.1087, lr_0 = 1.3126e-04
Loss = 1.1196e-03, PNorm = 154.8164, GNorm = 0.1518, lr_0 = 1.3117e-04
Loss = 2.2538e-03, PNorm = 154.8184, GNorm = 0.2106, lr_0 = 1.3108e-04
Loss = 7.0858e-04, PNorm = 154.8200, GNorm = 0.0780, lr_0 = 1.3099e-04
Loss = 6.7981e-04, PNorm = 154.8213, GNorm = 0.0519, lr_0 = 1.3090e-04
Loss = 1.8340e-03, PNorm = 154.8233, GNorm = 0.0748, lr_0 = 1.3081e-04
Loss = 3.1402e-03, PNorm = 154.8264, GNorm = 0.1223, lr_0 = 1.3072e-04
Loss = 1.8040e-03, PNorm = 154.8300, GNorm = 0.0395, lr_0 = 1.3063e-04
Loss = 9.8746e-04, PNorm = 154.8330, GNorm = 0.0508, lr_0 = 1.3054e-04
Loss = 2.3124e-03, PNorm = 154.8334, GNorm = 0.1021, lr_0 = 1.3045e-04
Loss = 7.2454e-04, PNorm = 154.8346, GNorm = 0.1331, lr_0 = 1.3036e-04
Loss = 1.4511e-03, PNorm = 154.8347, GNorm = 0.1290, lr_0 = 1.3027e-04
Loss = 1.7383e-03, PNorm = 154.8369, GNorm = 0.0310, lr_0 = 1.3018e-04
Loss = 9.7935e-04, PNorm = 154.8387, GNorm = 0.1331, lr_0 = 1.3009e-04
Loss = 1.7089e-03, PNorm = 154.8406, GNorm = 0.0631, lr_0 = 1.3000e-04
Loss = 1.4051e-03, PNorm = 154.8430, GNorm = 0.0937, lr_0 = 1.2992e-04
Loss = 6.7716e-04, PNorm = 154.8452, GNorm = 0.1103, lr_0 = 1.2983e-04
Loss = 1.1185e-03, PNorm = 154.8483, GNorm = 0.0473, lr_0 = 1.2974e-04
Loss = 7.1750e-04, PNorm = 154.8504, GNorm = 0.2338, lr_0 = 1.2965e-04
Loss = 4.2349e-03, PNorm = 154.8520, GNorm = 0.3467, lr_0 = 1.2956e-04
Loss = 3.4921e-03, PNorm = 154.8511, GNorm = 0.1140, lr_0 = 1.2947e-04
Loss = 2.9931e-03, PNorm = 154.8530, GNorm = 0.0984, lr_0 = 1.2938e-04
Loss = 1.0414e-03, PNorm = 154.8546, GNorm = 0.0760, lr_0 = 1.2929e-04
Loss = 2.3101e-03, PNorm = 154.8570, GNorm = 0.0410, lr_0 = 1.2921e-04
Loss = 3.2007e-03, PNorm = 154.8587, GNorm = 0.0364, lr_0 = 1.2912e-04
Loss = 6.6009e-04, PNorm = 154.8622, GNorm = 0.2041, lr_0 = 1.2903e-04
Loss = 1.6383e-03, PNorm = 154.8651, GNorm = 0.0497, lr_0 = 1.2894e-04
Loss = 1.4707e-03, PNorm = 154.8661, GNorm = 0.0947, lr_0 = 1.2885e-04
Loss = 1.2046e-03, PNorm = 154.8669, GNorm = 0.1390, lr_0 = 1.2876e-04
Loss = 3.4319e-03, PNorm = 154.8679, GNorm = 0.1046, lr_0 = 1.2867e-04
Loss = 1.1880e-03, PNorm = 154.8689, GNorm = 0.0681, lr_0 = 1.2859e-04
Loss = 7.7456e-04, PNorm = 154.8716, GNorm = 0.0752, lr_0 = 1.2850e-04
Loss = 1.1760e-03, PNorm = 154.8764, GNorm = 0.1734, lr_0 = 1.2841e-04
Loss = 1.3585e-03, PNorm = 154.8776, GNorm = 0.0532, lr_0 = 1.2832e-04
Loss = 1.8891e-03, PNorm = 154.8798, GNorm = 0.1179, lr_0 = 1.2823e-04
Loss = 6.3587e-04, PNorm = 154.8815, GNorm = 0.1336, lr_0 = 1.2815e-04
Loss = 2.0466e-03, PNorm = 154.8820, GNorm = 0.0794, lr_0 = 1.2806e-04
Loss = 8.9030e-04, PNorm = 154.8836, GNorm = 0.0551, lr_0 = 1.2797e-04
Validation mae = 0.475918
Epoch 27
Loss = 5.5727e-04, PNorm = 154.8852, GNorm = 0.0202, lr_0 = 1.2788e-04
Loss = 7.1113e-04, PNorm = 154.8855, GNorm = 0.1658, lr_0 = 1.2780e-04
Loss = 7.7948e-04, PNorm = 154.8868, GNorm = 0.1309, lr_0 = 1.2771e-04
Loss = 7.5289e-04, PNorm = 154.8862, GNorm = 0.1675, lr_0 = 1.2762e-04
Loss = 1.1769e-03, PNorm = 154.8870, GNorm = 0.0440, lr_0 = 1.2753e-04
Loss = 1.0296e-03, PNorm = 154.8876, GNorm = 0.1637, lr_0 = 1.2745e-04
Loss = 1.0669e-03, PNorm = 154.8897, GNorm = 0.0352, lr_0 = 1.2736e-04
Loss = 6.5524e-04, PNorm = 154.8896, GNorm = 0.0904, lr_0 = 1.2727e-04
Loss = 1.2241e-03, PNorm = 154.8927, GNorm = 0.0291, lr_0 = 1.2718e-04
Loss = 1.9476e-03, PNorm = 154.8947, GNorm = 0.0790, lr_0 = 1.2710e-04
Loss = 9.6906e-04, PNorm = 154.8961, GNorm = 0.0928, lr_0 = 1.2701e-04
Loss = 9.5590e-04, PNorm = 154.8977, GNorm = 0.0363, lr_0 = 1.2692e-04
Loss = 1.1055e-03, PNorm = 154.9000, GNorm = 0.0709, lr_0 = 1.2684e-04
Loss = 1.1388e-03, PNorm = 154.9003, GNorm = 0.0834, lr_0 = 1.2675e-04
Loss = 2.7723e-03, PNorm = 154.9008, GNorm = 0.1228, lr_0 = 1.2666e-04
Loss = 3.0187e-03, PNorm = 154.9003, GNorm = 0.0775, lr_0 = 1.2658e-04
Loss = 7.0108e-04, PNorm = 154.8998, GNorm = 0.0910, lr_0 = 1.2649e-04
Loss = 5.8547e-04, PNorm = 154.9021, GNorm = 0.0846, lr_0 = 1.2640e-04
Loss = 2.9877e-03, PNorm = 154.9028, GNorm = 0.0445, lr_0 = 1.2632e-04
Loss = 1.0720e-03, PNorm = 154.9034, GNorm = 0.0496, lr_0 = 1.2623e-04
Loss = 1.2198e-03, PNorm = 154.9043, GNorm = 0.0846, lr_0 = 1.2614e-04
Loss = 8.9928e-04, PNorm = 154.9050, GNorm = 0.0952, lr_0 = 1.2606e-04
Loss = 2.7786e-03, PNorm = 154.9072, GNorm = 0.1741, lr_0 = 1.2597e-04
Loss = 1.9281e-03, PNorm = 154.9084, GNorm = 0.1698, lr_0 = 1.2588e-04
Loss = 1.7646e-03, PNorm = 154.9115, GNorm = 0.0651, lr_0 = 1.2580e-04
Loss = 4.1416e-03, PNorm = 154.9145, GNorm = 0.0454, lr_0 = 1.2571e-04
Loss = 1.6820e-03, PNorm = 154.9168, GNorm = 0.2137, lr_0 = 1.2563e-04
Loss = 1.3098e-03, PNorm = 154.9177, GNorm = 0.1244, lr_0 = 1.2554e-04
Loss = 8.8072e-04, PNorm = 154.9186, GNorm = 0.2221, lr_0 = 1.2545e-04
Loss = 1.2155e-03, PNorm = 154.9208, GNorm = 0.0794, lr_0 = 1.2537e-04
Loss = 1.0428e-03, PNorm = 154.9198, GNorm = 0.1973, lr_0 = 1.2528e-04
Loss = 1.3924e-03, PNorm = 154.9208, GNorm = 0.0702, lr_0 = 1.2520e-04
Loss = 9.3377e-04, PNorm = 154.9226, GNorm = 0.1273, lr_0 = 1.2511e-04
Loss = 5.9784e-04, PNorm = 154.9253, GNorm = 0.0769, lr_0 = 1.2502e-04
Loss = 9.2892e-04, PNorm = 154.9285, GNorm = 0.1973, lr_0 = 1.2494e-04
Loss = 3.5027e-03, PNorm = 154.9305, GNorm = 0.1284, lr_0 = 1.2485e-04
Loss = 2.1447e-03, PNorm = 154.9324, GNorm = 0.0493, lr_0 = 1.2477e-04
Loss = 1.3950e-03, PNorm = 154.9327, GNorm = 0.3031, lr_0 = 1.2468e-04
Loss = 6.7682e-04, PNorm = 154.9327, GNorm = 0.0986, lr_0 = 1.2460e-04
Loss = 1.5433e-03, PNorm = 154.9343, GNorm = 0.0384, lr_0 = 1.2451e-04
Loss = 6.6647e-04, PNorm = 154.9356, GNorm = 0.1371, lr_0 = 1.2443e-04
Loss = 1.0250e-03, PNorm = 154.9380, GNorm = 0.0584, lr_0 = 1.2434e-04
Loss = 1.0272e-03, PNorm = 154.9411, GNorm = 0.0881, lr_0 = 1.2426e-04
Loss = 1.2847e-03, PNorm = 154.9436, GNorm = 0.0632, lr_0 = 1.2417e-04
Loss = 1.0898e-03, PNorm = 154.9447, GNorm = 0.0693, lr_0 = 1.2409e-04
Loss = 8.3937e-04, PNorm = 154.9463, GNorm = 0.0400, lr_0 = 1.2400e-04
Loss = 7.4803e-04, PNorm = 154.9476, GNorm = 0.0844, lr_0 = 1.2392e-04
Loss = 6.5263e-04, PNorm = 154.9490, GNorm = 0.0575, lr_0 = 1.2383e-04
Loss = 5.5190e-04, PNorm = 154.9521, GNorm = 0.0258, lr_0 = 1.2375e-04
Loss = 1.1855e-03, PNorm = 154.9545, GNorm = 0.0509, lr_0 = 1.2366e-04
Loss = 5.7383e-04, PNorm = 154.9554, GNorm = 0.0535, lr_0 = 1.2358e-04
Loss = 1.1257e-03, PNorm = 154.9567, GNorm = 0.1875, lr_0 = 1.2349e-04
Loss = 1.4154e-03, PNorm = 154.9570, GNorm = 0.1889, lr_0 = 1.2341e-04
Loss = 6.6370e-04, PNorm = 154.9602, GNorm = 0.0478, lr_0 = 1.2332e-04
Loss = 1.1060e-03, PNorm = 154.9618, GNorm = 0.0773, lr_0 = 1.2324e-04
Loss = 6.1891e-04, PNorm = 154.9630, GNorm = 0.0491, lr_0 = 1.2315e-04
Loss = 1.3250e-03, PNorm = 154.9641, GNorm = 0.0386, lr_0 = 1.2307e-04
Loss = 8.7014e-04, PNorm = 154.9652, GNorm = 0.1049, lr_0 = 1.2298e-04
Loss = 8.6820e-04, PNorm = 154.9660, GNorm = 0.0909, lr_0 = 1.2290e-04
Loss = 6.1398e-04, PNorm = 154.9681, GNorm = 0.0820, lr_0 = 1.2282e-04
Loss = 9.5839e-04, PNorm = 154.9681, GNorm = 0.2523, lr_0 = 1.2273e-04
Loss = 8.3860e-04, PNorm = 154.9696, GNorm = 0.1032, lr_0 = 1.2265e-04
Loss = 2.0878e-03, PNorm = 154.9699, GNorm = 0.0964, lr_0 = 1.2256e-04
Loss = 1.6455e-03, PNorm = 154.9695, GNorm = 0.0891, lr_0 = 1.2248e-04
Loss = 1.1929e-03, PNorm = 154.9707, GNorm = 0.1438, lr_0 = 1.2240e-04
Loss = 6.4357e-04, PNorm = 154.9728, GNorm = 0.0656, lr_0 = 1.2231e-04
Loss = 1.0938e-03, PNorm = 154.9748, GNorm = 0.0814, lr_0 = 1.2223e-04
Loss = 9.9896e-04, PNorm = 154.9758, GNorm = 0.0826, lr_0 = 1.2214e-04
Loss = 1.1199e-03, PNorm = 154.9769, GNorm = 0.0787, lr_0 = 1.2206e-04
Loss = 5.6323e-04, PNorm = 154.9784, GNorm = 0.0391, lr_0 = 1.2198e-04
Loss = 1.8389e-03, PNorm = 154.9793, GNorm = 0.0833, lr_0 = 1.2189e-04
Loss = 1.0651e-03, PNorm = 154.9801, GNorm = 0.1939, lr_0 = 1.2181e-04
Loss = 6.5842e-04, PNorm = 154.9809, GNorm = 0.0546, lr_0 = 1.2173e-04
Loss = 1.7493e-03, PNorm = 154.9830, GNorm = 0.1352, lr_0 = 1.2164e-04
Loss = 1.0116e-03, PNorm = 154.9824, GNorm = 0.0386, lr_0 = 1.2156e-04
Loss = 2.0244e-03, PNorm = 154.9839, GNorm = 0.0541, lr_0 = 1.2148e-04
Loss = 7.7631e-04, PNorm = 154.9848, GNorm = 0.1241, lr_0 = 1.2139e-04
Loss = 1.0698e-03, PNorm = 154.9870, GNorm = 0.0390, lr_0 = 1.2131e-04
Loss = 9.3837e-04, PNorm = 154.9876, GNorm = 0.0862, lr_0 = 1.2123e-04
Loss = 5.7283e-04, PNorm = 154.9883, GNorm = 0.0376, lr_0 = 1.2114e-04
Loss = 1.1170e-03, PNorm = 154.9894, GNorm = 0.0841, lr_0 = 1.2106e-04
Loss = 1.0952e-03, PNorm = 154.9913, GNorm = 0.0514, lr_0 = 1.2098e-04
Loss = 6.7425e-04, PNorm = 154.9940, GNorm = 0.0648, lr_0 = 1.2090e-04
Loss = 1.2691e-03, PNorm = 154.9959, GNorm = 0.0822, lr_0 = 1.2081e-04
Loss = 9.0201e-04, PNorm = 154.9966, GNorm = 0.0879, lr_0 = 1.2073e-04
Loss = 9.1795e-04, PNorm = 154.9964, GNorm = 0.1122, lr_0 = 1.2065e-04
Loss = 8.9194e-04, PNorm = 154.9987, GNorm = 0.1192, lr_0 = 1.2056e-04
Loss = 2.5073e-03, PNorm = 155.0010, GNorm = 0.1144, lr_0 = 1.2048e-04
Loss = 1.0200e-03, PNorm = 155.0049, GNorm = 0.0725, lr_0 = 1.2040e-04
Loss = 7.4778e-04, PNorm = 155.0076, GNorm = 0.1139, lr_0 = 1.2032e-04
Loss = 6.2322e-03, PNorm = 155.0084, GNorm = 0.3468, lr_0 = 1.2023e-04
Loss = 7.5268e-04, PNorm = 155.0128, GNorm = 0.1560, lr_0 = 1.2015e-04
Loss = 6.7070e-04, PNorm = 155.0156, GNorm = 0.1119, lr_0 = 1.2007e-04
Loss = 8.8900e-04, PNorm = 155.0167, GNorm = 0.1039, lr_0 = 1.1999e-04
Loss = 2.4770e-03, PNorm = 155.0185, GNorm = 0.0653, lr_0 = 1.1991e-04
Loss = 6.8556e-04, PNorm = 155.0194, GNorm = 0.0818, lr_0 = 1.1982e-04
Loss = 8.7152e-04, PNorm = 155.0212, GNorm = 0.3468, lr_0 = 1.1974e-04
Loss = 6.9723e-04, PNorm = 155.0215, GNorm = 0.1881, lr_0 = 1.1966e-04
Loss = 1.8632e-03, PNorm = 155.0231, GNorm = 0.1681, lr_0 = 1.1958e-04
Loss = 9.8318e-04, PNorm = 155.0226, GNorm = 0.1310, lr_0 = 1.1950e-04
Loss = 1.2840e-03, PNorm = 155.0241, GNorm = 0.0999, lr_0 = 1.1941e-04
Loss = 1.9159e-03, PNorm = 155.0243, GNorm = 0.0987, lr_0 = 1.1933e-04
Loss = 2.1710e-03, PNorm = 155.0268, GNorm = 0.1046, lr_0 = 1.1925e-04
Loss = 1.9182e-03, PNorm = 155.0301, GNorm = 0.0371, lr_0 = 1.1917e-04
Loss = 1.2894e-03, PNorm = 155.0331, GNorm = 0.0500, lr_0 = 1.1909e-04
Loss = 1.1221e-03, PNorm = 155.0344, GNorm = 0.0936, lr_0 = 1.1901e-04
Loss = 7.0926e-04, PNorm = 155.0354, GNorm = 0.1268, lr_0 = 1.1892e-04
Loss = 9.6345e-04, PNorm = 155.0358, GNorm = 0.0237, lr_0 = 1.1884e-04
Loss = 9.2036e-04, PNorm = 155.0368, GNorm = 0.1603, lr_0 = 1.1876e-04
Loss = 1.9971e-03, PNorm = 155.0386, GNorm = 0.0741, lr_0 = 1.1868e-04
Loss = 1.5956e-03, PNorm = 155.0396, GNorm = 0.1101, lr_0 = 1.1860e-04
Loss = 1.1780e-03, PNorm = 155.0408, GNorm = 0.1092, lr_0 = 1.1852e-04
Loss = 2.4187e-03, PNorm = 155.0437, GNorm = 0.2509, lr_0 = 1.1844e-04
Loss = 2.0049e-03, PNorm = 155.0459, GNorm = 0.1085, lr_0 = 1.1835e-04
Loss = 2.3653e-03, PNorm = 155.0483, GNorm = 0.0707, lr_0 = 1.1827e-04
Loss = 1.1802e-03, PNorm = 155.0485, GNorm = 0.2067, lr_0 = 1.1819e-04
Loss = 1.5925e-03, PNorm = 155.0503, GNorm = 0.0740, lr_0 = 1.1811e-04
Loss = 8.3642e-04, PNorm = 155.0530, GNorm = 0.0335, lr_0 = 1.1803e-04
Loss = 1.5232e-03, PNorm = 155.0555, GNorm = 0.3065, lr_0 = 1.1795e-04
Loss = 6.1778e-04, PNorm = 155.0558, GNorm = 0.0570, lr_0 = 1.1787e-04
Validation mae = 0.476217
Epoch 28
Loss = 7.1380e-04, PNorm = 155.0559, GNorm = 0.0768, lr_0 = 1.1779e-04
Loss = 9.0793e-04, PNorm = 155.0558, GNorm = 0.0421, lr_0 = 1.1771e-04
Loss = 8.2488e-04, PNorm = 155.0565, GNorm = 0.0222, lr_0 = 1.1763e-04
Loss = 1.2436e-03, PNorm = 155.0581, GNorm = 0.0653, lr_0 = 1.1755e-04
Loss = 7.2843e-04, PNorm = 155.0609, GNorm = 0.1126, lr_0 = 1.1747e-04
Loss = 4.5012e-04, PNorm = 155.0633, GNorm = 0.0934, lr_0 = 1.1739e-04
Loss = 2.3084e-03, PNorm = 155.0644, GNorm = 0.1446, lr_0 = 1.1730e-04
Loss = 5.1759e-04, PNorm = 155.0652, GNorm = 0.0604, lr_0 = 1.1722e-04
Loss = 5.1696e-04, PNorm = 155.0665, GNorm = 0.1070, lr_0 = 1.1714e-04
Loss = 1.5781e-03, PNorm = 155.0689, GNorm = 0.0957, lr_0 = 1.1706e-04
Loss = 3.6501e-03, PNorm = 155.0702, GNorm = 0.0453, lr_0 = 1.1698e-04
Loss = 1.2661e-03, PNorm = 155.0736, GNorm = 0.0434, lr_0 = 1.1690e-04
Loss = 1.8873e-03, PNorm = 155.0759, GNorm = 0.1945, lr_0 = 1.1682e-04
Loss = 5.2056e-04, PNorm = 155.0772, GNorm = 0.0784, lr_0 = 1.1674e-04
Loss = 7.0202e-04, PNorm = 155.0782, GNorm = 0.0729, lr_0 = 1.1666e-04
Loss = 4.9615e-04, PNorm = 155.0792, GNorm = 0.1351, lr_0 = 1.1658e-04
Loss = 1.3977e-03, PNorm = 155.0799, GNorm = 0.0524, lr_0 = 1.1650e-04
Loss = 6.3304e-04, PNorm = 155.0811, GNorm = 0.0578, lr_0 = 1.1642e-04
Loss = 9.0650e-04, PNorm = 155.0826, GNorm = 0.0358, lr_0 = 1.1634e-04
Loss = 7.1358e-04, PNorm = 155.0829, GNorm = 0.1070, lr_0 = 1.1626e-04
Loss = 1.3037e-03, PNorm = 155.0841, GNorm = 0.0400, lr_0 = 1.1618e-04
Loss = 5.2506e-04, PNorm = 155.0860, GNorm = 0.0591, lr_0 = 1.1611e-04
Loss = 1.3498e-03, PNorm = 155.0867, GNorm = 0.1698, lr_0 = 1.1603e-04
Loss = 4.6526e-04, PNorm = 155.0882, GNorm = 0.0749, lr_0 = 1.1595e-04
Loss = 1.4114e-03, PNorm = 155.0887, GNorm = 0.1275, lr_0 = 1.1587e-04
Loss = 7.1762e-04, PNorm = 155.0897, GNorm = 0.0685, lr_0 = 1.1579e-04
Loss = 6.3311e-04, PNorm = 155.0909, GNorm = 0.0512, lr_0 = 1.1571e-04
Loss = 6.9320e-04, PNorm = 155.0905, GNorm = 0.1152, lr_0 = 1.1563e-04
Loss = 1.4891e-03, PNorm = 155.0903, GNorm = 0.0217, lr_0 = 1.1555e-04
Loss = 2.7836e-03, PNorm = 155.0907, GNorm = 0.2214, lr_0 = 1.1547e-04
Loss = 5.2699e-04, PNorm = 155.0924, GNorm = 0.0757, lr_0 = 1.1539e-04
Loss = 7.9814e-04, PNorm = 155.0934, GNorm = 0.0318, lr_0 = 1.1531e-04
Loss = 1.1633e-03, PNorm = 155.0954, GNorm = 0.0250, lr_0 = 1.1523e-04
Loss = 1.5194e-03, PNorm = 155.0959, GNorm = 0.1352, lr_0 = 1.1515e-04
Loss = 5.3274e-04, PNorm = 155.0953, GNorm = 0.1096, lr_0 = 1.1508e-04
Loss = 4.7434e-04, PNorm = 155.0958, GNorm = 0.0642, lr_0 = 1.1500e-04
Loss = 5.4079e-04, PNorm = 155.0975, GNorm = 0.1322, lr_0 = 1.1492e-04
Loss = 5.8582e-04, PNorm = 155.0991, GNorm = 0.0568, lr_0 = 1.1484e-04
Loss = 4.7504e-04, PNorm = 155.1000, GNorm = 0.1112, lr_0 = 1.1476e-04
Loss = 1.7944e-03, PNorm = 155.1028, GNorm = 0.0310, lr_0 = 1.1468e-04
Loss = 7.8684e-04, PNorm = 155.1045, GNorm = 0.0724, lr_0 = 1.1460e-04
Loss = 6.5756e-04, PNorm = 155.1062, GNorm = 0.0610, lr_0 = 1.1452e-04
Loss = 1.5571e-03, PNorm = 155.1077, GNorm = 0.3353, lr_0 = 1.1445e-04
Loss = 7.5072e-04, PNorm = 155.1095, GNorm = 0.1314, lr_0 = 1.1437e-04
Loss = 4.7434e-04, PNorm = 155.1116, GNorm = 0.0825, lr_0 = 1.1429e-04
Loss = 7.8219e-04, PNorm = 155.1122, GNorm = 0.1069, lr_0 = 1.1421e-04
Loss = 7.5956e-04, PNorm = 155.1128, GNorm = 0.0601, lr_0 = 1.1413e-04
Loss = 1.3987e-03, PNorm = 155.1119, GNorm = 0.0802, lr_0 = 1.1405e-04
Loss = 1.2264e-03, PNorm = 155.1119, GNorm = 0.0824, lr_0 = 1.1398e-04
Loss = 1.4528e-03, PNorm = 155.1119, GNorm = 0.1090, lr_0 = 1.1390e-04
Loss = 8.1980e-04, PNorm = 155.1130, GNorm = 0.1423, lr_0 = 1.1382e-04
Loss = 4.8555e-03, PNorm = 155.1149, GNorm = 0.0705, lr_0 = 1.1374e-04
Loss = 8.3675e-04, PNorm = 155.1164, GNorm = 0.1495, lr_0 = 1.1366e-04
Loss = 1.1801e-03, PNorm = 155.1184, GNorm = 0.0548, lr_0 = 1.1359e-04
Loss = 9.9512e-04, PNorm = 155.1185, GNorm = 0.1436, lr_0 = 1.1351e-04
Loss = 1.6130e-03, PNorm = 155.1205, GNorm = 0.1285, lr_0 = 1.1343e-04
Loss = 2.2950e-03, PNorm = 155.1221, GNorm = 0.0354, lr_0 = 1.1335e-04
Loss = 5.5641e-04, PNorm = 155.1229, GNorm = 0.0759, lr_0 = 1.1328e-04
Loss = 5.8897e-04, PNorm = 155.1247, GNorm = 0.0438, lr_0 = 1.1320e-04
Loss = 1.5645e-03, PNorm = 155.1271, GNorm = 0.1331, lr_0 = 1.1312e-04
Loss = 4.7524e-04, PNorm = 155.1281, GNorm = 0.0698, lr_0 = 1.1304e-04
Loss = 2.0387e-03, PNorm = 155.1286, GNorm = 0.1252, lr_0 = 1.1297e-04
Loss = 9.3664e-04, PNorm = 155.1303, GNorm = 0.1058, lr_0 = 1.1289e-04
Loss = 7.6598e-04, PNorm = 155.1321, GNorm = 0.0486, lr_0 = 1.1281e-04
Loss = 5.1921e-04, PNorm = 155.1330, GNorm = 0.0580, lr_0 = 1.1273e-04
Loss = 7.8563e-04, PNorm = 155.1332, GNorm = 0.0438, lr_0 = 1.1266e-04
Loss = 1.9039e-03, PNorm = 155.1331, GNorm = 0.0281, lr_0 = 1.1258e-04
Loss = 1.2172e-03, PNorm = 155.1324, GNorm = 0.0902, lr_0 = 1.1250e-04
Loss = 5.5528e-04, PNorm = 155.1337, GNorm = 0.0531, lr_0 = 1.1243e-04
Loss = 7.6498e-04, PNorm = 155.1358, GNorm = 0.1004, lr_0 = 1.1235e-04
Loss = 9.0892e-04, PNorm = 155.1370, GNorm = 0.0430, lr_0 = 1.1227e-04
Loss = 7.9897e-04, PNorm = 155.1382, GNorm = 0.1285, lr_0 = 1.1219e-04
Loss = 1.1016e-03, PNorm = 155.1395, GNorm = 0.0875, lr_0 = 1.1212e-04
Loss = 1.9436e-03, PNorm = 155.1408, GNorm = 0.0396, lr_0 = 1.1204e-04
Loss = 7.1050e-04, PNorm = 155.1421, GNorm = 0.2279, lr_0 = 1.1196e-04
Loss = 5.0151e-04, PNorm = 155.1434, GNorm = 0.0940, lr_0 = 1.1189e-04
Loss = 1.1796e-03, PNorm = 155.1448, GNorm = 0.0429, lr_0 = 1.1181e-04
Loss = 3.1134e-03, PNorm = 155.1454, GNorm = 0.0396, lr_0 = 1.1173e-04
Loss = 1.5204e-03, PNorm = 155.1459, GNorm = 0.0759, lr_0 = 1.1166e-04
Loss = 1.1741e-03, PNorm = 155.1481, GNorm = 0.0315, lr_0 = 1.1158e-04
Loss = 1.0225e-03, PNorm = 155.1488, GNorm = 0.1932, lr_0 = 1.1150e-04
Loss = 1.5107e-03, PNorm = 155.1516, GNorm = 0.0999, lr_0 = 1.1143e-04
Loss = 9.6155e-04, PNorm = 155.1541, GNorm = 0.0619, lr_0 = 1.1135e-04
Loss = 1.5638e-03, PNorm = 155.1548, GNorm = 0.0779, lr_0 = 1.1128e-04
Loss = 1.4010e-03, PNorm = 155.1555, GNorm = 0.0569, lr_0 = 1.1120e-04
Loss = 7.4951e-04, PNorm = 155.1553, GNorm = 0.0523, lr_0 = 1.1112e-04
Loss = 7.0785e-04, PNorm = 155.1561, GNorm = 0.0593, lr_0 = 1.1105e-04
Loss = 1.0698e-03, PNorm = 155.1576, GNorm = 0.0713, lr_0 = 1.1097e-04
Loss = 6.5216e-04, PNorm = 155.1591, GNorm = 0.1502, lr_0 = 1.1089e-04
Loss = 1.5332e-03, PNorm = 155.1595, GNorm = 0.1976, lr_0 = 1.1082e-04
Loss = 5.8738e-04, PNorm = 155.1611, GNorm = 0.0473, lr_0 = 1.1074e-04
Loss = 1.1364e-03, PNorm = 155.1638, GNorm = 0.0218, lr_0 = 1.1067e-04
Loss = 5.2449e-04, PNorm = 155.1659, GNorm = 0.1389, lr_0 = 1.1059e-04
Loss = 8.7705e-04, PNorm = 155.1657, GNorm = 0.1343, lr_0 = 1.1052e-04
Loss = 9.1089e-04, PNorm = 155.1673, GNorm = 0.0493, lr_0 = 1.1044e-04
Loss = 2.7408e-03, PNorm = 155.1676, GNorm = 0.0525, lr_0 = 1.1036e-04
Loss = 7.3437e-04, PNorm = 155.1678, GNorm = 0.0467, lr_0 = 1.1029e-04
Loss = 5.2453e-04, PNorm = 155.1693, GNorm = 0.0979, lr_0 = 1.1021e-04
Loss = 6.5085e-04, PNorm = 155.1699, GNorm = 0.0412, lr_0 = 1.1014e-04
Loss = 1.2531e-03, PNorm = 155.1715, GNorm = 0.0365, lr_0 = 1.1006e-04
Loss = 5.7652e-04, PNorm = 155.1742, GNorm = 0.0709, lr_0 = 1.0999e-04
Loss = 1.0072e-03, PNorm = 155.1758, GNorm = 0.1160, lr_0 = 1.0991e-04
Loss = 7.8101e-04, PNorm = 155.1772, GNorm = 0.0674, lr_0 = 1.0984e-04
Loss = 8.3784e-04, PNorm = 155.1777, GNorm = 0.0857, lr_0 = 1.0976e-04
Loss = 1.0698e-03, PNorm = 155.1792, GNorm = 0.1263, lr_0 = 1.0969e-04
Loss = 2.5105e-03, PNorm = 155.1795, GNorm = 0.0419, lr_0 = 1.0961e-04
Loss = 1.8369e-03, PNorm = 155.1809, GNorm = 0.0520, lr_0 = 1.0954e-04
Loss = 1.8142e-03, PNorm = 155.1815, GNorm = 0.0457, lr_0 = 1.0946e-04
Loss = 2.6893e-03, PNorm = 155.1842, GNorm = 0.0937, lr_0 = 1.0939e-04
Loss = 4.8342e-04, PNorm = 155.1859, GNorm = 0.1045, lr_0 = 1.0931e-04
Loss = 1.6275e-03, PNorm = 155.1874, GNorm = 0.0744, lr_0 = 1.0924e-04
Loss = 8.1659e-04, PNorm = 155.1903, GNorm = 0.1462, lr_0 = 1.0916e-04
Loss = 4.7918e-04, PNorm = 155.1934, GNorm = 0.0825, lr_0 = 1.0909e-04
Loss = 2.1834e-03, PNorm = 155.1944, GNorm = 0.2163, lr_0 = 1.0901e-04
Loss = 3.3474e-03, PNorm = 155.1949, GNorm = 0.2961, lr_0 = 1.0894e-04
Loss = 1.2829e-03, PNorm = 155.1979, GNorm = 0.1060, lr_0 = 1.0886e-04
Loss = 6.4510e-04, PNorm = 155.2002, GNorm = 0.0877, lr_0 = 1.0879e-04
Loss = 1.6268e-03, PNorm = 155.2027, GNorm = 0.0626, lr_0 = 1.0871e-04
Loss = 2.0385e-03, PNorm = 155.2012, GNorm = 0.0996, lr_0 = 1.0864e-04
Loss = 1.7821e-03, PNorm = 155.2021, GNorm = 0.1040, lr_0 = 1.0856e-04
Validation mae = 0.475685
Epoch 29
Loss = 4.7483e-04, PNorm = 155.2019, GNorm = 0.0955, lr_0 = 1.0849e-04
Loss = 1.4409e-03, PNorm = 155.2038, GNorm = 0.0685, lr_0 = 1.0841e-04
Loss = 1.1136e-03, PNorm = 155.2050, GNorm = 0.0632, lr_0 = 1.0834e-04
Loss = 7.4136e-04, PNorm = 155.2079, GNorm = 0.0341, lr_0 = 1.0827e-04
Loss = 1.1748e-03, PNorm = 155.2097, GNorm = 0.0339, lr_0 = 1.0819e-04
Loss = 1.0631e-03, PNorm = 155.2088, GNorm = 0.0596, lr_0 = 1.0812e-04
Loss = 4.9169e-04, PNorm = 155.2091, GNorm = 0.0667, lr_0 = 1.0804e-04
Loss = 5.3053e-04, PNorm = 155.2097, GNorm = 0.0631, lr_0 = 1.0797e-04
Loss = 6.7785e-04, PNorm = 155.2092, GNorm = 0.1505, lr_0 = 1.0790e-04
Loss = 4.2047e-04, PNorm = 155.2109, GNorm = 0.1167, lr_0 = 1.0782e-04
Loss = 4.7260e-04, PNorm = 155.2113, GNorm = 0.0376, lr_0 = 1.0775e-04
Loss = 5.2490e-04, PNorm = 155.2124, GNorm = 0.0339, lr_0 = 1.0767e-04
Loss = 6.5004e-04, PNorm = 155.2132, GNorm = 0.0760, lr_0 = 1.0760e-04
Loss = 4.1654e-04, PNorm = 155.2139, GNorm = 0.0192, lr_0 = 1.0753e-04
Loss = 7.2186e-04, PNorm = 155.2155, GNorm = 0.1182, lr_0 = 1.0745e-04
Loss = 4.8643e-04, PNorm = 155.2164, GNorm = 0.0544, lr_0 = 1.0738e-04
Loss = 8.8989e-04, PNorm = 155.2164, GNorm = 0.1250, lr_0 = 1.0731e-04
Loss = 4.3548e-04, PNorm = 155.2174, GNorm = 0.0503, lr_0 = 1.0723e-04
Loss = 5.3784e-04, PNorm = 155.2186, GNorm = 0.0333, lr_0 = 1.0716e-04
Loss = 4.0548e-04, PNorm = 155.2202, GNorm = 0.0349, lr_0 = 1.0709e-04
Loss = 2.1386e-03, PNorm = 155.2214, GNorm = 0.1412, lr_0 = 1.0701e-04
Loss = 1.2112e-03, PNorm = 155.2208, GNorm = 0.1402, lr_0 = 1.0694e-04
Loss = 5.7226e-04, PNorm = 155.2218, GNorm = 0.0642, lr_0 = 1.0687e-04
Loss = 8.1530e-04, PNorm = 155.2216, GNorm = 0.0467, lr_0 = 1.0679e-04
Loss = 5.9691e-04, PNorm = 155.2228, GNorm = 0.0469, lr_0 = 1.0672e-04
Loss = 5.3023e-04, PNorm = 155.2233, GNorm = 0.0700, lr_0 = 1.0665e-04
Loss = 5.4095e-04, PNorm = 155.2251, GNorm = 0.1166, lr_0 = 1.0657e-04
Loss = 1.2120e-03, PNorm = 155.2263, GNorm = 0.1291, lr_0 = 1.0650e-04
Loss = 6.3564e-04, PNorm = 155.2271, GNorm = 0.0866, lr_0 = 1.0643e-04
Loss = 4.2756e-04, PNorm = 155.2273, GNorm = 0.0368, lr_0 = 1.0635e-04
Loss = 1.0899e-03, PNorm = 155.2286, GNorm = 0.1481, lr_0 = 1.0628e-04
Loss = 3.8516e-04, PNorm = 155.2294, GNorm = 0.0739, lr_0 = 1.0621e-04
Loss = 9.2771e-04, PNorm = 155.2300, GNorm = 0.0868, lr_0 = 1.0614e-04
Loss = 4.8529e-04, PNorm = 155.2318, GNorm = 0.0707, lr_0 = 1.0606e-04
Loss = 9.7250e-04, PNorm = 155.2324, GNorm = 0.0730, lr_0 = 1.0599e-04
Loss = 1.6423e-03, PNorm = 155.2336, GNorm = 0.0719, lr_0 = 1.0592e-04
Loss = 6.5481e-04, PNorm = 155.2338, GNorm = 0.1123, lr_0 = 1.0585e-04
Loss = 1.1485e-03, PNorm = 155.2364, GNorm = 0.0766, lr_0 = 1.0577e-04
Loss = 2.0683e-03, PNorm = 155.2395, GNorm = 0.0475, lr_0 = 1.0570e-04
Loss = 1.3634e-03, PNorm = 155.2418, GNorm = 0.1293, lr_0 = 1.0563e-04
Loss = 1.1039e-03, PNorm = 155.2426, GNorm = 0.0742, lr_0 = 1.0556e-04
Loss = 5.7533e-04, PNorm = 155.2449, GNorm = 0.1004, lr_0 = 1.0548e-04
Loss = 6.4872e-04, PNorm = 155.2464, GNorm = 0.0246, lr_0 = 1.0541e-04
Loss = 1.0712e-03, PNorm = 155.2462, GNorm = 0.0629, lr_0 = 1.0534e-04
Loss = 5.8586e-04, PNorm = 155.2478, GNorm = 0.0609, lr_0 = 1.0527e-04
Loss = 6.3994e-04, PNorm = 155.2481, GNorm = 0.0393, lr_0 = 1.0519e-04
Loss = 6.4274e-04, PNorm = 155.2482, GNorm = 0.0460, lr_0 = 1.0512e-04
Loss = 2.7047e-03, PNorm = 155.2495, GNorm = 0.3762, lr_0 = 1.0505e-04
Loss = 1.4364e-03, PNorm = 155.2514, GNorm = 0.1123, lr_0 = 1.0498e-04
Loss = 9.6538e-04, PNorm = 155.2528, GNorm = 0.1616, lr_0 = 1.0491e-04
Loss = 8.3697e-04, PNorm = 155.2529, GNorm = 0.0868, lr_0 = 1.0483e-04
Loss = 4.4748e-04, PNorm = 155.2527, GNorm = 0.0403, lr_0 = 1.0476e-04
Loss = 1.1666e-03, PNorm = 155.2544, GNorm = 0.0778, lr_0 = 1.0469e-04
Loss = 4.9143e-04, PNorm = 155.2562, GNorm = 0.0691, lr_0 = 1.0462e-04
Loss = 8.6028e-04, PNorm = 155.2588, GNorm = 0.0653, lr_0 = 1.0455e-04
Loss = 1.8432e-03, PNorm = 155.2586, GNorm = 0.0805, lr_0 = 1.0448e-04
Loss = 1.4001e-03, PNorm = 155.2592, GNorm = 0.1398, lr_0 = 1.0440e-04
Loss = 7.3264e-04, PNorm = 155.2613, GNorm = 0.1015, lr_0 = 1.0433e-04
Loss = 6.2809e-04, PNorm = 155.2614, GNorm = 0.0430, lr_0 = 1.0426e-04
Loss = 4.6336e-04, PNorm = 155.2631, GNorm = 0.0313, lr_0 = 1.0419e-04
Loss = 1.1158e-03, PNorm = 155.2645, GNorm = 0.0375, lr_0 = 1.0412e-04
Loss = 1.0165e-03, PNorm = 155.2664, GNorm = 0.0451, lr_0 = 1.0405e-04
Loss = 1.3471e-03, PNorm = 155.2662, GNorm = 0.0954, lr_0 = 1.0398e-04
Loss = 2.2915e-03, PNorm = 155.2673, GNorm = 0.0183, lr_0 = 1.0391e-04
Loss = 2.5137e-03, PNorm = 155.2685, GNorm = 0.0289, lr_0 = 1.0383e-04
Loss = 4.1991e-04, PNorm = 155.2680, GNorm = 0.0637, lr_0 = 1.0376e-04
Loss = 3.8073e-04, PNorm = 155.2687, GNorm = 0.0746, lr_0 = 1.0369e-04
Loss = 1.3806e-03, PNorm = 155.2695, GNorm = 0.0832, lr_0 = 1.0362e-04
Loss = 4.9171e-04, PNorm = 155.2707, GNorm = 0.0717, lr_0 = 1.0355e-04
Loss = 1.9866e-03, PNorm = 155.2722, GNorm = 0.0549, lr_0 = 1.0348e-04
Loss = 5.6216e-04, PNorm = 155.2733, GNorm = 0.0721, lr_0 = 1.0341e-04
Loss = 8.4909e-04, PNorm = 155.2747, GNorm = 0.2496, lr_0 = 1.0334e-04
Loss = 6.9013e-04, PNorm = 155.2760, GNorm = 0.1122, lr_0 = 1.0327e-04
Loss = 5.7700e-04, PNorm = 155.2773, GNorm = 0.0275, lr_0 = 1.0320e-04
Loss = 2.9937e-03, PNorm = 155.2779, GNorm = 0.0887, lr_0 = 1.0312e-04
Loss = 1.1166e-03, PNorm = 155.2789, GNorm = 0.0720, lr_0 = 1.0305e-04
Loss = 8.0652e-04, PNorm = 155.2808, GNorm = 0.0496, lr_0 = 1.0298e-04
Loss = 1.2951e-03, PNorm = 155.2824, GNorm = 0.0375, lr_0 = 1.0291e-04
Loss = 4.3566e-04, PNorm = 155.2850, GNorm = 0.1621, lr_0 = 1.0284e-04
Loss = 9.3910e-04, PNorm = 155.2849, GNorm = 0.0847, lr_0 = 1.0277e-04
Loss = 1.3311e-03, PNorm = 155.2862, GNorm = 0.0642, lr_0 = 1.0270e-04
Loss = 4.5349e-04, PNorm = 155.2874, GNorm = 0.0640, lr_0 = 1.0263e-04
Loss = 4.1681e-04, PNorm = 155.2884, GNorm = 0.0896, lr_0 = 1.0256e-04
Loss = 6.4689e-04, PNorm = 155.2892, GNorm = 0.0627, lr_0 = 1.0249e-04
Loss = 8.1868e-04, PNorm = 155.2903, GNorm = 0.2595, lr_0 = 1.0242e-04
Loss = 3.5092e-03, PNorm = 155.2919, GNorm = 0.2076, lr_0 = 1.0235e-04
Loss = 1.9905e-03, PNorm = 155.2959, GNorm = 0.0888, lr_0 = 1.0228e-04
Loss = 1.3819e-03, PNorm = 155.2962, GNorm = 0.2150, lr_0 = 1.0221e-04
Loss = 4.6172e-04, PNorm = 155.2971, GNorm = 0.0602, lr_0 = 1.0214e-04
Loss = 1.6326e-03, PNorm = 155.2974, GNorm = 0.1130, lr_0 = 1.0207e-04
Loss = 1.0057e-03, PNorm = 155.2963, GNorm = 0.0692, lr_0 = 1.0200e-04
Loss = 1.9572e-03, PNorm = 155.2980, GNorm = 0.0581, lr_0 = 1.0193e-04
Loss = 1.4047e-03, PNorm = 155.2978, GNorm = 0.4033, lr_0 = 1.0186e-04
Loss = 1.0434e-03, PNorm = 155.2988, GNorm = 0.0782, lr_0 = 1.0179e-04
Loss = 4.7710e-04, PNorm = 155.3004, GNorm = 0.0625, lr_0 = 1.0172e-04
Loss = 5.0249e-04, PNorm = 155.3022, GNorm = 0.0811, lr_0 = 1.0165e-04
Loss = 3.5528e-03, PNorm = 155.3029, GNorm = 0.0553, lr_0 = 1.0158e-04
Loss = 3.9044e-04, PNorm = 155.3036, GNorm = 0.1795, lr_0 = 1.0151e-04
Loss = 4.2105e-04, PNorm = 155.3045, GNorm = 0.0978, lr_0 = 1.0144e-04
Loss = 1.7765e-03, PNorm = 155.3043, GNorm = 0.1403, lr_0 = 1.0137e-04
Loss = 6.1474e-04, PNorm = 155.3040, GNorm = 0.0856, lr_0 = 1.0130e-04
Loss = 9.8755e-04, PNorm = 155.3055, GNorm = 0.0736, lr_0 = 1.0123e-04
Loss = 1.1072e-03, PNorm = 155.3073, GNorm = 0.1145, lr_0 = 1.0116e-04
Loss = 1.9577e-03, PNorm = 155.3091, GNorm = 0.1303, lr_0 = 1.0110e-04
Loss = 1.5080e-03, PNorm = 155.3095, GNorm = 0.0833, lr_0 = 1.0103e-04
Loss = 2.0926e-03, PNorm = 155.3118, GNorm = 0.1630, lr_0 = 1.0096e-04
Loss = 8.5789e-04, PNorm = 155.3129, GNorm = 0.0634, lr_0 = 1.0089e-04
Loss = 9.0344e-04, PNorm = 155.3136, GNorm = 0.0334, lr_0 = 1.0082e-04
Loss = 1.1371e-03, PNorm = 155.3143, GNorm = 0.1879, lr_0 = 1.0075e-04
Loss = 5.5693e-04, PNorm = 155.3154, GNorm = 0.1513, lr_0 = 1.0068e-04
Loss = 3.2461e-03, PNorm = 155.3158, GNorm = 0.0737, lr_0 = 1.0061e-04
Loss = 1.2771e-03, PNorm = 155.3169, GNorm = 0.0753, lr_0 = 1.0054e-04
Loss = 7.0250e-04, PNorm = 155.3175, GNorm = 0.0866, lr_0 = 1.0047e-04
Loss = 2.1312e-03, PNorm = 155.3184, GNorm = 0.0918, lr_0 = 1.0041e-04
Loss = 1.1841e-03, PNorm = 155.3182, GNorm = 0.0623, lr_0 = 1.0034e-04
Loss = 2.1611e-03, PNorm = 155.3197, GNorm = 0.0567, lr_0 = 1.0027e-04
Loss = 1.5523e-03, PNorm = 155.3213, GNorm = 0.1171, lr_0 = 1.0020e-04
Loss = 2.0385e-03, PNorm = 155.3235, GNorm = 0.0889, lr_0 = 1.0013e-04
Loss = 1.8340e-03, PNorm = 155.3230, GNorm = 0.0634, lr_0 = 1.0006e-04
Loss = 2.6283e-03, PNorm = 155.3223, GNorm = 0.1549, lr_0 = 1.0000e-04
Validation mae = 0.476370
Model 0 best validation mae = 0.475683 on epoch 23
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.455711
Ensemble test mae = 0.455711
Fold 1
Splitting data with seed 1
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 9.5737e-01, PNorm = 62.2424, GNorm = 1.9897, lr_0 = 1.0413e-04
Loss = 7.8401e-01, PNorm = 62.2532, GNorm = 1.5015, lr_0 = 1.0788e-04
Loss = 5.8280e-01, PNorm = 62.2668, GNorm = 1.5458, lr_0 = 1.1163e-04
Loss = 5.7209e-01, PNorm = 62.2798, GNorm = 2.2268, lr_0 = 1.1537e-04
Loss = 5.0023e-01, PNorm = 62.2900, GNorm = 1.9023, lr_0 = 1.1913e-04
Loss = 4.2980e-01, PNorm = 62.3002, GNorm = 1.7031, lr_0 = 1.2287e-04
Loss = 4.4143e-01, PNorm = 62.3090, GNorm = 2.5094, lr_0 = 1.2663e-04
Loss = 3.8885e-01, PNorm = 62.3189, GNorm = 1.9902, lr_0 = 1.3038e-04
Loss = 4.0948e-01, PNorm = 62.3295, GNorm = 2.3558, lr_0 = 1.3413e-04
Loss = 4.3106e-01, PNorm = 62.3367, GNorm = 5.2094, lr_0 = 1.3788e-04
Loss = 3.6954e-01, PNorm = 62.3455, GNorm = 1.9452, lr_0 = 1.4163e-04
Loss = 2.6808e-01, PNorm = 62.3572, GNorm = 1.4513, lr_0 = 1.4537e-04
Loss = 3.7528e-01, PNorm = 62.3664, GNorm = 1.9778, lr_0 = 1.4913e-04
Loss = 3.5281e-01, PNorm = 62.3746, GNorm = 2.1116, lr_0 = 1.5288e-04
Loss = 2.9762e-01, PNorm = 62.3845, GNorm = 1.7202, lr_0 = 1.5662e-04
Loss = 3.1294e-01, PNorm = 62.3966, GNorm = 1.5625, lr_0 = 1.6038e-04
Loss = 2.8163e-01, PNorm = 62.4059, GNorm = 1.7104, lr_0 = 1.6412e-04
Loss = 3.1849e-01, PNorm = 62.4157, GNorm = 1.5689, lr_0 = 1.6788e-04
Loss = 2.9881e-01, PNorm = 62.4260, GNorm = 2.1138, lr_0 = 1.7163e-04
Loss = 3.5764e-01, PNorm = 62.4391, GNorm = 2.1948, lr_0 = 1.7538e-04
Loss = 3.0395e-01, PNorm = 62.4511, GNorm = 2.3369, lr_0 = 1.7913e-04
Loss = 3.0286e-01, PNorm = 62.4636, GNorm = 1.7777, lr_0 = 1.8288e-04
Loss = 3.0019e-01, PNorm = 62.4768, GNorm = 1.6242, lr_0 = 1.8662e-04
Loss = 3.2603e-01, PNorm = 62.4871, GNorm = 2.8754, lr_0 = 1.9038e-04
Loss = 3.3122e-01, PNorm = 62.5019, GNorm = 1.8954, lr_0 = 1.9413e-04
Loss = 2.6740e-01, PNorm = 62.5178, GNorm = 1.5352, lr_0 = 1.9788e-04
Loss = 3.1347e-01, PNorm = 62.5321, GNorm = 1.8618, lr_0 = 2.0163e-04
Loss = 3.2114e-01, PNorm = 62.5470, GNorm = 1.8219, lr_0 = 2.0537e-04
Loss = 3.1315e-01, PNorm = 62.5616, GNorm = 2.1669, lr_0 = 2.0913e-04
Loss = 2.6644e-01, PNorm = 62.5754, GNorm = 1.5039, lr_0 = 2.1288e-04
Loss = 2.4616e-01, PNorm = 62.5926, GNorm = 1.8144, lr_0 = 2.1663e-04
Loss = 2.6691e-01, PNorm = 62.6065, GNorm = 1.3994, lr_0 = 2.2038e-04
Loss = 3.0638e-01, PNorm = 62.6195, GNorm = 4.0898, lr_0 = 2.2412e-04
Loss = 2.9910e-01, PNorm = 62.6357, GNorm = 1.7334, lr_0 = 2.2787e-04
Loss = 3.1092e-01, PNorm = 62.6547, GNorm = 1.5077, lr_0 = 2.3163e-04
Loss = 2.7272e-01, PNorm = 62.6708, GNorm = 1.2135, lr_0 = 2.3538e-04
Loss = 2.6097e-01, PNorm = 62.6860, GNorm = 1.7369, lr_0 = 2.3913e-04
Loss = 2.8492e-01, PNorm = 62.7010, GNorm = 1.3058, lr_0 = 2.4288e-04
Loss = 2.5765e-01, PNorm = 62.7197, GNorm = 1.2621, lr_0 = 2.4662e-04
Loss = 2.7749e-01, PNorm = 62.7332, GNorm = 1.8356, lr_0 = 2.5038e-04
Loss = 2.6285e-01, PNorm = 62.7517, GNorm = 1.7567, lr_0 = 2.5413e-04
Loss = 2.5837e-01, PNorm = 62.7703, GNorm = 1.4720, lr_0 = 2.5788e-04
Loss = 2.6299e-01, PNorm = 62.7838, GNorm = 1.1221, lr_0 = 2.6163e-04
Loss = 2.5378e-01, PNorm = 62.8058, GNorm = 1.0037, lr_0 = 2.6537e-04
Loss = 2.7412e-01, PNorm = 62.8261, GNorm = 1.2687, lr_0 = 2.6912e-04
Loss = 2.4975e-01, PNorm = 62.8458, GNorm = 1.2872, lr_0 = 2.7288e-04
Loss = 2.3029e-01, PNorm = 62.8659, GNorm = 0.9901, lr_0 = 2.7663e-04
Loss = 2.6166e-01, PNorm = 62.8875, GNorm = 1.3357, lr_0 = 2.8038e-04
Loss = 2.4572e-01, PNorm = 62.9061, GNorm = 1.2335, lr_0 = 2.8413e-04
Loss = 2.8904e-01, PNorm = 62.9218, GNorm = 1.4746, lr_0 = 2.8787e-04
Loss = 2.5030e-01, PNorm = 62.9458, GNorm = 1.1753, lr_0 = 2.9163e-04
Loss = 2.4447e-01, PNorm = 62.9693, GNorm = 1.8922, lr_0 = 2.9538e-04
Loss = 2.7435e-01, PNorm = 62.9891, GNorm = 1.2610, lr_0 = 2.9913e-04
Loss = 2.4455e-01, PNorm = 63.0198, GNorm = 1.7469, lr_0 = 3.0288e-04
Loss = 2.4020e-01, PNorm = 63.0422, GNorm = 1.4833, lr_0 = 3.0662e-04
Loss = 2.6772e-01, PNorm = 63.0665, GNorm = 1.0833, lr_0 = 3.1037e-04
Loss = 2.6778e-01, PNorm = 63.0930, GNorm = 2.0322, lr_0 = 3.1413e-04
Loss = 2.9055e-01, PNorm = 63.1190, GNorm = 1.4816, lr_0 = 3.1788e-04
Loss = 2.1611e-01, PNorm = 63.1509, GNorm = 1.4336, lr_0 = 3.2163e-04
Loss = 2.6099e-01, PNorm = 63.1746, GNorm = 1.0425, lr_0 = 3.2538e-04
Loss = 2.6648e-01, PNorm = 63.2072, GNorm = 1.4664, lr_0 = 3.2912e-04
Loss = 2.8706e-01, PNorm = 63.2341, GNorm = 1.3841, lr_0 = 3.3288e-04
Loss = 2.2211e-01, PNorm = 63.2655, GNorm = 0.9487, lr_0 = 3.3663e-04
Loss = 2.2617e-01, PNorm = 63.2938, GNorm = 1.1222, lr_0 = 3.4038e-04
Loss = 2.6448e-01, PNorm = 63.3203, GNorm = 1.4278, lr_0 = 3.4413e-04
Loss = 2.3229e-01, PNorm = 63.3509, GNorm = 1.0242, lr_0 = 3.4787e-04
Loss = 2.3497e-01, PNorm = 63.3812, GNorm = 1.5513, lr_0 = 3.5162e-04
Loss = 2.4756e-01, PNorm = 63.4118, GNorm = 0.9751, lr_0 = 3.5538e-04
Loss = 2.4580e-01, PNorm = 63.4483, GNorm = 1.3904, lr_0 = 3.5913e-04
Loss = 2.4175e-01, PNorm = 63.4801, GNorm = 1.3833, lr_0 = 3.6288e-04
Loss = 2.3782e-01, PNorm = 63.5100, GNorm = 1.5823, lr_0 = 3.6662e-04
Loss = 2.4290e-01, PNorm = 63.5479, GNorm = 1.1643, lr_0 = 3.7037e-04
Loss = 2.1696e-01, PNorm = 63.5819, GNorm = 0.8110, lr_0 = 3.7413e-04
Loss = 2.6907e-01, PNorm = 63.6178, GNorm = 1.6121, lr_0 = 3.7788e-04
Loss = 2.2521e-01, PNorm = 63.6506, GNorm = 1.0298, lr_0 = 3.8163e-04
Loss = 2.5706e-01, PNorm = 63.6815, GNorm = 1.1234, lr_0 = 3.8537e-04
Loss = 2.1984e-01, PNorm = 63.7181, GNorm = 1.1994, lr_0 = 3.8912e-04
Loss = 2.2611e-01, PNorm = 63.7448, GNorm = 0.9244, lr_0 = 3.9287e-04
Loss = 2.2064e-01, PNorm = 63.7769, GNorm = 1.0475, lr_0 = 3.9663e-04
Loss = 2.4585e-01, PNorm = 63.8072, GNorm = 1.4755, lr_0 = 4.0038e-04
Loss = 2.6881e-01, PNorm = 63.8446, GNorm = 1.5827, lr_0 = 4.0413e-04
Loss = 2.2541e-01, PNorm = 63.8846, GNorm = 0.9467, lr_0 = 4.0787e-04
Loss = 2.1851e-01, PNorm = 63.9268, GNorm = 0.9051, lr_0 = 4.1162e-04
Loss = 2.1869e-01, PNorm = 63.9531, GNorm = 1.5794, lr_0 = 4.1537e-04
Loss = 2.5164e-01, PNorm = 63.9882, GNorm = 0.6713, lr_0 = 4.1913e-04
Loss = 2.4011e-01, PNorm = 64.0264, GNorm = 1.0065, lr_0 = 4.2288e-04
Loss = 2.2263e-01, PNorm = 64.0676, GNorm = 0.9948, lr_0 = 4.2662e-04
Loss = 2.4044e-01, PNorm = 64.1100, GNorm = 1.6680, lr_0 = 4.3037e-04
Loss = 2.3875e-01, PNorm = 64.1447, GNorm = 1.1010, lr_0 = 4.3412e-04
Loss = 2.1887e-01, PNorm = 64.1854, GNorm = 1.1039, lr_0 = 4.3788e-04
Loss = 2.4522e-01, PNorm = 64.2265, GNorm = 1.7063, lr_0 = 4.4163e-04
Loss = 2.4042e-01, PNorm = 64.2688, GNorm = 0.8740, lr_0 = 4.4538e-04
Loss = 2.3252e-01, PNorm = 64.3168, GNorm = 1.4369, lr_0 = 4.4912e-04
Loss = 2.3487e-01, PNorm = 64.3608, GNorm = 1.2682, lr_0 = 4.5287e-04
Loss = 2.5903e-01, PNorm = 64.4073, GNorm = 1.3467, lr_0 = 4.5662e-04
Loss = 2.3883e-01, PNorm = 64.4499, GNorm = 1.1522, lr_0 = 4.6038e-04
Loss = 2.1346e-01, PNorm = 64.4885, GNorm = 0.9745, lr_0 = 4.6413e-04
Loss = 2.3988e-01, PNorm = 64.5369, GNorm = 1.2882, lr_0 = 4.6787e-04
Loss = 2.3242e-01, PNorm = 64.5804, GNorm = 1.1810, lr_0 = 4.7162e-04
Loss = 2.2912e-01, PNorm = 64.6233, GNorm = 0.9691, lr_0 = 4.7537e-04
Loss = 2.4112e-01, PNorm = 64.6658, GNorm = 0.9808, lr_0 = 4.7913e-04
Loss = 2.2018e-01, PNorm = 64.7109, GNorm = 1.0627, lr_0 = 4.8288e-04
Loss = 1.8939e-01, PNorm = 64.7583, GNorm = 1.1278, lr_0 = 4.8663e-04
Loss = 1.9633e-01, PNorm = 64.8015, GNorm = 0.8290, lr_0 = 4.9038e-04
Loss = 2.2296e-01, PNorm = 64.8464, GNorm = 1.1959, lr_0 = 4.9412e-04
Loss = 2.1883e-01, PNorm = 64.8897, GNorm = 0.7699, lr_0 = 4.9788e-04
Loss = 2.0990e-01, PNorm = 64.9485, GNorm = 0.8308, lr_0 = 5.0163e-04
Loss = 2.1062e-01, PNorm = 64.9954, GNorm = 0.8714, lr_0 = 5.0538e-04
Loss = 2.1470e-01, PNorm = 65.0512, GNorm = 1.0344, lr_0 = 5.0913e-04
Loss = 2.3212e-01, PNorm = 65.1006, GNorm = 1.7131, lr_0 = 5.1287e-04
Loss = 1.8527e-01, PNorm = 65.1478, GNorm = 0.8396, lr_0 = 5.1663e-04
Loss = 1.8791e-01, PNorm = 65.1911, GNorm = 0.8880, lr_0 = 5.2038e-04
Loss = 2.0738e-01, PNorm = 65.2345, GNorm = 0.9381, lr_0 = 5.2413e-04
Loss = 2.1054e-01, PNorm = 65.2836, GNorm = 1.0501, lr_0 = 5.2788e-04
Loss = 2.0573e-01, PNorm = 65.3364, GNorm = 1.1250, lr_0 = 5.3162e-04
Loss = 2.0741e-01, PNorm = 65.3844, GNorm = 0.9608, lr_0 = 5.3538e-04
Loss = 2.0256e-01, PNorm = 65.4427, GNorm = 0.9526, lr_0 = 5.3912e-04
Loss = 2.1526e-01, PNorm = 65.4911, GNorm = 0.8989, lr_0 = 5.4288e-04
Loss = 2.1539e-01, PNorm = 65.5459, GNorm = 1.1491, lr_0 = 5.4663e-04
Loss = 2.1841e-01, PNorm = 65.6016, GNorm = 0.8808, lr_0 = 5.5038e-04
Validation mae = 0.550630
Epoch 1
Loss = 1.3615e-01, PNorm = 65.6558, GNorm = 0.9153, lr_0 = 5.5413e-04
Loss = 1.4150e-01, PNorm = 65.7049, GNorm = 1.0331, lr_0 = 5.5787e-04
Loss = 1.3651e-01, PNorm = 65.7473, GNorm = 0.8213, lr_0 = 5.6163e-04
Loss = 1.4753e-01, PNorm = 65.7924, GNorm = 0.7173, lr_0 = 5.6538e-04
Loss = 1.5751e-01, PNorm = 65.8430, GNorm = 0.9134, lr_0 = 5.6913e-04
Loss = 1.2797e-01, PNorm = 65.8926, GNorm = 0.7372, lr_0 = 5.7288e-04
Loss = 1.4551e-01, PNorm = 65.9432, GNorm = 0.7310, lr_0 = 5.7662e-04
Loss = 1.3503e-01, PNorm = 65.9945, GNorm = 0.7909, lr_0 = 5.8038e-04
Loss = 1.2776e-01, PNorm = 66.0471, GNorm = 0.6519, lr_0 = 5.8413e-04
Loss = 1.6839e-01, PNorm = 66.1015, GNorm = 0.8143, lr_0 = 5.8788e-04
Loss = 1.2961e-01, PNorm = 66.1638, GNorm = 0.6340, lr_0 = 5.9163e-04
Loss = 1.3975e-01, PNorm = 66.2197, GNorm = 0.8861, lr_0 = 5.9538e-04
Loss = 1.3765e-01, PNorm = 66.2694, GNorm = 0.7363, lr_0 = 5.9913e-04
Loss = 1.4641e-01, PNorm = 66.3321, GNorm = 0.7440, lr_0 = 6.0288e-04
Loss = 1.4327e-01, PNorm = 66.3962, GNorm = 1.0208, lr_0 = 6.0663e-04
Loss = 1.3152e-01, PNorm = 66.4562, GNorm = 0.7672, lr_0 = 6.1038e-04
Loss = 1.4126e-01, PNorm = 66.5254, GNorm = 0.8553, lr_0 = 6.1413e-04
Loss = 1.5434e-01, PNorm = 66.5819, GNorm = 0.7574, lr_0 = 6.1788e-04
Loss = 1.4131e-01, PNorm = 66.6573, GNorm = 0.7569, lr_0 = 6.2163e-04
Loss = 1.5575e-01, PNorm = 66.7232, GNorm = 0.6672, lr_0 = 6.2538e-04
Loss = 1.6912e-01, PNorm = 66.7989, GNorm = 0.8367, lr_0 = 6.2913e-04
Loss = 1.4358e-01, PNorm = 66.8715, GNorm = 0.7380, lr_0 = 6.3288e-04
Loss = 1.5430e-01, PNorm = 66.9469, GNorm = 0.7416, lr_0 = 6.3663e-04
Loss = 1.6866e-01, PNorm = 67.0187, GNorm = 0.8578, lr_0 = 6.4038e-04
Loss = 1.5181e-01, PNorm = 67.0945, GNorm = 1.3347, lr_0 = 6.4413e-04
Loss = 1.6800e-01, PNorm = 67.1715, GNorm = 0.8786, lr_0 = 6.4788e-04
Loss = 1.5611e-01, PNorm = 67.2553, GNorm = 0.9958, lr_0 = 6.5163e-04
Loss = 1.6027e-01, PNorm = 67.3295, GNorm = 0.6344, lr_0 = 6.5538e-04
Loss = 1.5579e-01, PNorm = 67.4109, GNorm = 0.7601, lr_0 = 6.5913e-04
Loss = 1.7135e-01, PNorm = 67.4851, GNorm = 0.8336, lr_0 = 6.6288e-04
Loss = 1.5833e-01, PNorm = 67.5751, GNorm = 1.0223, lr_0 = 6.6663e-04
Loss = 1.4017e-01, PNorm = 67.6493, GNorm = 0.9767, lr_0 = 6.7038e-04
Loss = 1.5686e-01, PNorm = 67.7315, GNorm = 0.9158, lr_0 = 6.7413e-04
Loss = 2.0027e-01, PNorm = 67.8111, GNorm = 0.8158, lr_0 = 6.7788e-04
Loss = 1.8770e-01, PNorm = 67.9145, GNorm = 0.7910, lr_0 = 6.8163e-04
Loss = 1.7518e-01, PNorm = 68.0031, GNorm = 1.1730, lr_0 = 6.8538e-04
Loss = 1.7591e-01, PNorm = 68.0893, GNorm = 0.8475, lr_0 = 6.8913e-04
Loss = 1.5264e-01, PNorm = 68.1807, GNorm = 0.7347, lr_0 = 6.9288e-04
Loss = 1.6191e-01, PNorm = 68.2682, GNorm = 0.8112, lr_0 = 6.9663e-04
Loss = 1.7340e-01, PNorm = 68.3584, GNorm = 0.9576, lr_0 = 7.0038e-04
Loss = 1.6362e-01, PNorm = 68.4612, GNorm = 0.7945, lr_0 = 7.0413e-04
Loss = 1.7176e-01, PNorm = 68.5521, GNorm = 1.1309, lr_0 = 7.0788e-04
Loss = 1.6853e-01, PNorm = 68.6466, GNorm = 1.2969, lr_0 = 7.1163e-04
Loss = 1.8327e-01, PNorm = 68.7320, GNorm = 0.6483, lr_0 = 7.1538e-04
Loss = 1.6260e-01, PNorm = 68.8340, GNorm = 0.8312, lr_0 = 7.1913e-04
Loss = 1.6986e-01, PNorm = 68.9320, GNorm = 1.1519, lr_0 = 7.2288e-04
Loss = 1.6803e-01, PNorm = 69.0267, GNorm = 1.2524, lr_0 = 7.2663e-04
Loss = 1.6888e-01, PNorm = 69.1233, GNorm = 0.8378, lr_0 = 7.3038e-04
Loss = 1.6882e-01, PNorm = 69.2052, GNorm = 1.1507, lr_0 = 7.3413e-04
Loss = 1.8290e-01, PNorm = 69.3094, GNorm = 0.9046, lr_0 = 7.3788e-04
Loss = 1.9034e-01, PNorm = 69.4033, GNorm = 0.6119, lr_0 = 7.4163e-04
Loss = 1.9355e-01, PNorm = 69.5110, GNorm = 0.9672, lr_0 = 7.4538e-04
Loss = 1.7227e-01, PNorm = 69.6095, GNorm = 0.8101, lr_0 = 7.4913e-04
Loss = 1.8068e-01, PNorm = 69.7149, GNorm = 0.8240, lr_0 = 7.5288e-04
Loss = 2.0366e-01, PNorm = 69.8160, GNorm = 1.2146, lr_0 = 7.5663e-04
Loss = 1.7562e-01, PNorm = 69.9238, GNorm = 0.9329, lr_0 = 7.6038e-04
Loss = 1.5982e-01, PNorm = 70.0278, GNorm = 0.7814, lr_0 = 7.6413e-04
Loss = 1.7027e-01, PNorm = 70.1237, GNorm = 1.2030, lr_0 = 7.6788e-04
Loss = 1.8821e-01, PNorm = 70.2195, GNorm = 0.6381, lr_0 = 7.7163e-04
Loss = 1.6849e-01, PNorm = 70.3226, GNorm = 0.7687, lr_0 = 7.7538e-04
Loss = 1.6746e-01, PNorm = 70.4237, GNorm = 0.8651, lr_0 = 7.7913e-04
Loss = 1.7323e-01, PNorm = 70.5208, GNorm = 0.8848, lr_0 = 7.8288e-04
Loss = 1.7733e-01, PNorm = 70.6295, GNorm = 0.8502, lr_0 = 7.8663e-04
Loss = 1.6104e-01, PNorm = 70.7323, GNorm = 1.0198, lr_0 = 7.9038e-04
Loss = 1.8353e-01, PNorm = 70.8412, GNorm = 1.4915, lr_0 = 7.9413e-04
Loss = 1.8078e-01, PNorm = 70.9498, GNorm = 0.9093, lr_0 = 7.9788e-04
Loss = 1.5930e-01, PNorm = 71.0638, GNorm = 0.8919, lr_0 = 8.0163e-04
Loss = 1.5260e-01, PNorm = 71.1644, GNorm = 0.8800, lr_0 = 8.0538e-04
Loss = 1.5517e-01, PNorm = 71.2628, GNorm = 0.8991, lr_0 = 8.0913e-04
Loss = 1.6900e-01, PNorm = 71.3590, GNorm = 0.8968, lr_0 = 8.1288e-04
Loss = 1.4702e-01, PNorm = 71.4654, GNorm = 0.6304, lr_0 = 8.1663e-04
Loss = 1.7959e-01, PNorm = 71.5699, GNorm = 0.8227, lr_0 = 8.2038e-04
Loss = 2.0091e-01, PNorm = 71.6714, GNorm = 0.6211, lr_0 = 8.2413e-04
Loss = 1.7716e-01, PNorm = 71.7930, GNorm = 0.8181, lr_0 = 8.2788e-04
Loss = 1.4969e-01, PNorm = 71.8984, GNorm = 0.8037, lr_0 = 8.3163e-04
Loss = 1.7442e-01, PNorm = 72.0083, GNorm = 0.6260, lr_0 = 8.3538e-04
Loss = 1.7167e-01, PNorm = 72.1085, GNorm = 0.8945, lr_0 = 8.3913e-04
Loss = 2.0193e-01, PNorm = 72.2219, GNorm = 0.7899, lr_0 = 8.4288e-04
Loss = 1.6342e-01, PNorm = 72.3419, GNorm = 0.7181, lr_0 = 8.4663e-04
Loss = 1.5688e-01, PNorm = 72.4578, GNorm = 0.7146, lr_0 = 8.5038e-04
Loss = 1.6456e-01, PNorm = 72.5614, GNorm = 0.8128, lr_0 = 8.5413e-04
Loss = 1.5858e-01, PNorm = 72.6674, GNorm = 0.5885, lr_0 = 8.5788e-04
Loss = 1.5395e-01, PNorm = 72.7662, GNorm = 1.2558, lr_0 = 8.6163e-04
Loss = 1.6675e-01, PNorm = 72.8642, GNorm = 0.9260, lr_0 = 8.6538e-04
Loss = 1.7776e-01, PNorm = 72.9783, GNorm = 1.0543, lr_0 = 8.6913e-04
Loss = 1.7889e-01, PNorm = 73.0937, GNorm = 0.7976, lr_0 = 8.7288e-04
Loss = 1.4828e-01, PNorm = 73.2182, GNorm = 0.5655, lr_0 = 8.7663e-04
Loss = 2.1508e-01, PNorm = 73.3278, GNorm = 0.9667, lr_0 = 8.8038e-04
Loss = 1.6294e-01, PNorm = 73.4535, GNorm = 1.2089, lr_0 = 8.8413e-04
Loss = 1.7019e-01, PNorm = 73.5701, GNorm = 0.7774, lr_0 = 8.8788e-04
Loss = 1.7351e-01, PNorm = 73.6829, GNorm = 0.8335, lr_0 = 8.9163e-04
Loss = 1.6160e-01, PNorm = 73.7961, GNorm = 0.7046, lr_0 = 8.9538e-04
Loss = 1.5763e-01, PNorm = 73.9054, GNorm = 0.5571, lr_0 = 8.9913e-04
Loss = 1.6249e-01, PNorm = 74.0237, GNorm = 0.5812, lr_0 = 9.0288e-04
Loss = 1.7608e-01, PNorm = 74.1433, GNorm = 0.9579, lr_0 = 9.0663e-04
Loss = 1.7142e-01, PNorm = 74.2668, GNorm = 0.9575, lr_0 = 9.1038e-04
Loss = 1.7180e-01, PNorm = 74.3817, GNorm = 1.0087, lr_0 = 9.1413e-04
Loss = 2.0757e-01, PNorm = 74.5113, GNorm = 0.9573, lr_0 = 9.1788e-04
Loss = 1.8346e-01, PNorm = 74.6542, GNorm = 0.7681, lr_0 = 9.2163e-04
Loss = 2.0433e-01, PNorm = 74.7862, GNorm = 0.9247, lr_0 = 9.2538e-04
Loss = 1.5542e-01, PNorm = 74.9203, GNorm = 0.7258, lr_0 = 9.2913e-04
Loss = 1.7477e-01, PNorm = 75.0540, GNorm = 0.7083, lr_0 = 9.3288e-04
Loss = 1.8766e-01, PNorm = 75.1727, GNorm = 0.8350, lr_0 = 9.3663e-04
Loss = 1.7477e-01, PNorm = 75.3022, GNorm = 0.9983, lr_0 = 9.4038e-04
Loss = 2.0011e-01, PNorm = 75.4381, GNorm = 0.6961, lr_0 = 9.4413e-04
Loss = 1.7508e-01, PNorm = 75.5799, GNorm = 0.6799, lr_0 = 9.4788e-04
Loss = 1.8902e-01, PNorm = 75.7272, GNorm = 0.6602, lr_0 = 9.5163e-04
Loss = 1.7967e-01, PNorm = 75.8663, GNorm = 1.4822, lr_0 = 9.5538e-04
Loss = 1.9520e-01, PNorm = 76.0080, GNorm = 0.7963, lr_0 = 9.5913e-04
Loss = 2.0061e-01, PNorm = 76.1578, GNorm = 0.6208, lr_0 = 9.6288e-04
Loss = 1.7579e-01, PNorm = 76.2953, GNorm = 0.6775, lr_0 = 9.6663e-04
Loss = 1.6937e-01, PNorm = 76.4259, GNorm = 0.7529, lr_0 = 9.7038e-04
Loss = 1.5660e-01, PNorm = 76.5459, GNorm = 0.8680, lr_0 = 9.7413e-04
Loss = 2.0992e-01, PNorm = 76.6714, GNorm = 0.8640, lr_0 = 9.7788e-04
Loss = 1.6443e-01, PNorm = 76.7914, GNorm = 1.2086, lr_0 = 9.8163e-04
Loss = 1.8065e-01, PNorm = 76.9238, GNorm = 1.2667, lr_0 = 9.8537e-04
Loss = 1.7413e-01, PNorm = 77.0402, GNorm = 0.6356, lr_0 = 9.8912e-04
Loss = 1.6608e-01, PNorm = 77.1603, GNorm = 0.6665, lr_0 = 9.9288e-04
Loss = 1.6647e-01, PNorm = 77.2809, GNorm = 0.8032, lr_0 = 9.9663e-04
Loss = 1.7711e-01, PNorm = 77.4048, GNorm = 0.7990, lr_0 = 9.9993e-04
Validation mae = 0.544734
Epoch 2
Loss = 1.0700e-01, PNorm = 77.5387, GNorm = 0.5472, lr_0 = 9.9925e-04
Loss = 1.0897e-01, PNorm = 77.6502, GNorm = 0.5264, lr_0 = 9.9856e-04
Loss = 9.7748e-02, PNorm = 77.7557, GNorm = 0.4784, lr_0 = 9.9788e-04
Loss = 1.0702e-01, PNorm = 77.8541, GNorm = 0.6120, lr_0 = 9.9719e-04
Loss = 1.2010e-01, PNorm = 77.9587, GNorm = 0.6393, lr_0 = 9.9651e-04
Loss = 1.0782e-01, PNorm = 78.0613, GNorm = 0.6919, lr_0 = 9.9583e-04
Loss = 9.4180e-02, PNorm = 78.1559, GNorm = 0.4968, lr_0 = 9.9515e-04
Loss = 1.1746e-01, PNorm = 78.2686, GNorm = 0.5395, lr_0 = 9.9446e-04
Loss = 1.2166e-01, PNorm = 78.3779, GNorm = 0.6081, lr_0 = 9.9378e-04
Loss = 1.0294e-01, PNorm = 78.4837, GNorm = 0.6017, lr_0 = 9.9310e-04
Loss = 1.2044e-01, PNorm = 78.6016, GNorm = 0.7745, lr_0 = 9.9242e-04
Loss = 1.1274e-01, PNorm = 78.7036, GNorm = 0.7541, lr_0 = 9.9174e-04
Loss = 1.1604e-01, PNorm = 78.8196, GNorm = 0.8992, lr_0 = 9.9106e-04
Loss = 1.0035e-01, PNorm = 78.9132, GNorm = 0.7214, lr_0 = 9.9038e-04
Loss = 1.0817e-01, PNorm = 79.0160, GNorm = 0.5148, lr_0 = 9.8971e-04
Loss = 9.3302e-02, PNorm = 79.1228, GNorm = 0.4326, lr_0 = 9.8903e-04
Loss = 9.0792e-02, PNorm = 79.2153, GNorm = 0.8285, lr_0 = 9.8835e-04
Loss = 1.2363e-01, PNorm = 79.2985, GNorm = 0.6549, lr_0 = 9.8767e-04
Loss = 1.0154e-01, PNorm = 79.3995, GNorm = 0.3939, lr_0 = 9.8700e-04
Loss = 8.8138e-02, PNorm = 79.4875, GNorm = 0.4812, lr_0 = 9.8632e-04
Loss = 1.0658e-01, PNorm = 79.5708, GNorm = 0.7655, lr_0 = 9.8564e-04
Loss = 1.0939e-01, PNorm = 79.6704, GNorm = 0.9984, lr_0 = 9.8497e-04
Loss = 9.6736e-02, PNorm = 79.7654, GNorm = 0.5706, lr_0 = 9.8429e-04
Loss = 1.0728e-01, PNorm = 79.8628, GNorm = 0.3593, lr_0 = 9.8362e-04
Loss = 1.1141e-01, PNorm = 79.9722, GNorm = 0.8893, lr_0 = 9.8295e-04
Loss = 1.1005e-01, PNorm = 80.0735, GNorm = 0.7345, lr_0 = 9.8227e-04
Loss = 1.1427e-01, PNorm = 80.1848, GNorm = 0.7390, lr_0 = 9.8160e-04
Loss = 9.6070e-02, PNorm = 80.2936, GNorm = 0.7566, lr_0 = 9.8093e-04
Loss = 9.9405e-02, PNorm = 80.3953, GNorm = 0.8498, lr_0 = 9.8026e-04
Loss = 1.1597e-01, PNorm = 80.4862, GNorm = 0.4589, lr_0 = 9.7958e-04
Loss = 1.1053e-01, PNorm = 80.5900, GNorm = 0.5413, lr_0 = 9.7891e-04
Loss = 1.0079e-01, PNorm = 80.6934, GNorm = 0.4316, lr_0 = 9.7824e-04
Loss = 1.1393e-01, PNorm = 80.7987, GNorm = 0.7484, lr_0 = 9.7757e-04
Loss = 1.0344e-01, PNorm = 80.9063, GNorm = 0.4876, lr_0 = 9.7690e-04
Loss = 9.8372e-02, PNorm = 81.0109, GNorm = 0.4184, lr_0 = 9.7623e-04
Loss = 1.2137e-01, PNorm = 81.1159, GNorm = 0.6659, lr_0 = 9.7556e-04
Loss = 1.0220e-01, PNorm = 81.2277, GNorm = 0.5085, lr_0 = 9.7490e-04
Loss = 1.0775e-01, PNorm = 81.3403, GNorm = 0.9174, lr_0 = 9.7423e-04
Loss = 1.0501e-01, PNorm = 81.4441, GNorm = 0.4363, lr_0 = 9.7356e-04
Loss = 1.0343e-01, PNorm = 81.5518, GNorm = 0.6439, lr_0 = 9.7289e-04
Loss = 1.1558e-01, PNorm = 81.6559, GNorm = 0.6420, lr_0 = 9.7223e-04
Loss = 1.0832e-01, PNorm = 81.7715, GNorm = 0.4618, lr_0 = 9.7156e-04
Loss = 1.0095e-01, PNorm = 81.8749, GNorm = 0.5742, lr_0 = 9.7090e-04
Loss = 1.1062e-01, PNorm = 81.9812, GNorm = 0.7562, lr_0 = 9.7023e-04
Loss = 1.2238e-01, PNorm = 82.0878, GNorm = 0.7748, lr_0 = 9.6957e-04
Loss = 9.8478e-02, PNorm = 82.1937, GNorm = 0.4881, lr_0 = 9.6890e-04
Loss = 1.1532e-01, PNorm = 82.3036, GNorm = 0.4999, lr_0 = 9.6824e-04
Loss = 1.2019e-01, PNorm = 82.4038, GNorm = 0.5835, lr_0 = 9.6757e-04
Loss = 1.2187e-01, PNorm = 82.5124, GNorm = 0.6175, lr_0 = 9.6691e-04
Loss = 1.1577e-01, PNorm = 82.6228, GNorm = 0.5431, lr_0 = 9.6625e-04
Loss = 1.2041e-01, PNorm = 82.7329, GNorm = 0.5102, lr_0 = 9.6559e-04
Loss = 1.0295e-01, PNorm = 82.8432, GNorm = 0.6879, lr_0 = 9.6493e-04
Loss = 1.1581e-01, PNorm = 82.9449, GNorm = 0.4162, lr_0 = 9.6427e-04
Loss = 1.2295e-01, PNorm = 83.0683, GNorm = 0.6587, lr_0 = 9.6360e-04
Loss = 1.3034e-01, PNorm = 83.1838, GNorm = 0.5992, lr_0 = 9.6294e-04
Loss = 9.4617e-02, PNorm = 83.2968, GNorm = 0.5734, lr_0 = 9.6228e-04
Loss = 1.1081e-01, PNorm = 83.4027, GNorm = 0.5884, lr_0 = 9.6163e-04
Loss = 1.1604e-01, PNorm = 83.5109, GNorm = 1.1929, lr_0 = 9.6097e-04
Loss = 1.2271e-01, PNorm = 83.6221, GNorm = 0.5724, lr_0 = 9.6031e-04
Loss = 1.0615e-01, PNorm = 83.7298, GNorm = 0.6538, lr_0 = 9.5965e-04
Loss = 1.1994e-01, PNorm = 83.8412, GNorm = 0.8051, lr_0 = 9.5899e-04
Loss = 1.0589e-01, PNorm = 83.9548, GNorm = 0.8388, lr_0 = 9.5834e-04
Loss = 1.0132e-01, PNorm = 84.0550, GNorm = 0.4530, lr_0 = 9.5768e-04
Loss = 1.1296e-01, PNorm = 84.1618, GNorm = 0.9478, lr_0 = 9.5702e-04
Loss = 1.0789e-01, PNorm = 84.2709, GNorm = 0.4443, lr_0 = 9.5637e-04
Loss = 9.8638e-02, PNorm = 84.3734, GNorm = 0.5527, lr_0 = 9.5571e-04
Loss = 1.0583e-01, PNorm = 84.4655, GNorm = 0.6570, lr_0 = 9.5506e-04
Loss = 1.0566e-01, PNorm = 84.5637, GNorm = 0.5051, lr_0 = 9.5440e-04
Loss = 1.2371e-01, PNorm = 84.6707, GNorm = 0.7414, lr_0 = 9.5375e-04
Loss = 1.2223e-01, PNorm = 84.7882, GNorm = 0.9383, lr_0 = 9.5310e-04
Loss = 1.0995e-01, PNorm = 84.9041, GNorm = 0.5829, lr_0 = 9.5244e-04
Loss = 1.0211e-01, PNorm = 85.0158, GNorm = 0.9181, lr_0 = 9.5179e-04
Loss = 1.2053e-01, PNorm = 85.1115, GNorm = 0.7541, lr_0 = 9.5114e-04
Loss = 9.9720e-02, PNorm = 85.2272, GNorm = 0.9590, lr_0 = 9.5049e-04
Loss = 1.2216e-01, PNorm = 85.3272, GNorm = 0.7186, lr_0 = 9.4984e-04
Loss = 9.9015e-02, PNorm = 85.4381, GNorm = 0.7226, lr_0 = 9.4919e-04
Loss = 1.0592e-01, PNorm = 85.5336, GNorm = 0.3756, lr_0 = 9.4854e-04
Loss = 1.1981e-01, PNorm = 85.6333, GNorm = 0.7775, lr_0 = 9.4789e-04
Loss = 1.1932e-01, PNorm = 85.7419, GNorm = 0.7494, lr_0 = 9.4724e-04
Loss = 1.3644e-01, PNorm = 85.8550, GNorm = 0.9778, lr_0 = 9.4659e-04
Loss = 1.0157e-01, PNorm = 85.9864, GNorm = 0.4271, lr_0 = 9.4594e-04
Loss = 1.3039e-01, PNorm = 86.1083, GNorm = 0.6055, lr_0 = 9.4529e-04
Loss = 1.1246e-01, PNorm = 86.2306, GNorm = 0.5169, lr_0 = 9.4464e-04
Loss = 1.1445e-01, PNorm = 86.3519, GNorm = 0.4820, lr_0 = 9.4400e-04
Loss = 1.1598e-01, PNorm = 86.4653, GNorm = 0.7591, lr_0 = 9.4335e-04
Loss = 1.1823e-01, PNorm = 86.5766, GNorm = 0.6866, lr_0 = 9.4270e-04
Loss = 1.0964e-01, PNorm = 86.6878, GNorm = 0.5496, lr_0 = 9.4206e-04
Loss = 1.1490e-01, PNorm = 86.7851, GNorm = 0.6630, lr_0 = 9.4141e-04
Loss = 1.2769e-01, PNorm = 86.8935, GNorm = 0.7725, lr_0 = 9.4077e-04
Loss = 1.2903e-01, PNorm = 87.0053, GNorm = 0.4595, lr_0 = 9.4012e-04
Loss = 1.2475e-01, PNorm = 87.1185, GNorm = 0.8663, lr_0 = 9.3948e-04
Loss = 1.2082e-01, PNorm = 87.2303, GNorm = 0.5467, lr_0 = 9.3884e-04
Loss = 1.1925e-01, PNorm = 87.3469, GNorm = 0.6128, lr_0 = 9.3819e-04
Loss = 1.1632e-01, PNorm = 87.4594, GNorm = 0.8160, lr_0 = 9.3755e-04
Loss = 1.2189e-01, PNorm = 87.5801, GNorm = 0.4470, lr_0 = 9.3691e-04
Loss = 1.1277e-01, PNorm = 87.7043, GNorm = 1.0146, lr_0 = 9.3627e-04
Loss = 1.1443e-01, PNorm = 87.8032, GNorm = 0.9971, lr_0 = 9.3562e-04
Loss = 1.1445e-01, PNorm = 87.9112, GNorm = 0.7159, lr_0 = 9.3498e-04
Loss = 1.1786e-01, PNorm = 88.0147, GNorm = 0.8594, lr_0 = 9.3434e-04
Loss = 1.0558e-01, PNorm = 88.1042, GNorm = 0.5813, lr_0 = 9.3370e-04
Loss = 1.1059e-01, PNorm = 88.2011, GNorm = 1.1213, lr_0 = 9.3306e-04
Loss = 1.1929e-01, PNorm = 88.2964, GNorm = 0.4454, lr_0 = 9.3242e-04
Loss = 1.2444e-01, PNorm = 88.3943, GNorm = 0.3798, lr_0 = 9.3178e-04
Loss = 9.8199e-02, PNorm = 88.4964, GNorm = 0.4469, lr_0 = 9.3115e-04
Loss = 1.0279e-01, PNorm = 88.5940, GNorm = 0.6961, lr_0 = 9.3051e-04
Loss = 1.2357e-01, PNorm = 88.6970, GNorm = 0.6226, lr_0 = 9.2987e-04
Loss = 1.2352e-01, PNorm = 88.8059, GNorm = 0.5498, lr_0 = 9.2923e-04
Loss = 1.1950e-01, PNorm = 88.9173, GNorm = 0.8182, lr_0 = 9.2860e-04
Loss = 1.2304e-01, PNorm = 89.0277, GNorm = 0.4994, lr_0 = 9.2796e-04
Loss = 1.1101e-01, PNorm = 89.1398, GNorm = 0.6884, lr_0 = 9.2733e-04
Loss = 1.0791e-01, PNorm = 89.2457, GNorm = 0.4543, lr_0 = 9.2669e-04
Loss = 1.2407e-01, PNorm = 89.3464, GNorm = 0.5982, lr_0 = 9.2606e-04
Loss = 1.1390e-01, PNorm = 89.4479, GNorm = 0.4775, lr_0 = 9.2542e-04
Loss = 1.1933e-01, PNorm = 89.5584, GNorm = 0.3935, lr_0 = 9.2479e-04
Loss = 1.2087e-01, PNorm = 89.6718, GNorm = 0.8168, lr_0 = 9.2415e-04
Loss = 1.2370e-01, PNorm = 89.7736, GNorm = 0.6789, lr_0 = 9.2352e-04
Loss = 1.1806e-01, PNorm = 89.8780, GNorm = 0.8300, lr_0 = 9.2289e-04
Loss = 1.2629e-01, PNorm = 89.9785, GNorm = 0.4895, lr_0 = 9.2226e-04
Loss = 1.0671e-01, PNorm = 90.0779, GNorm = 0.6797, lr_0 = 9.2162e-04
Loss = 1.1101e-01, PNorm = 90.1734, GNorm = 0.6423, lr_0 = 9.2099e-04
Validation mae = 0.518846
Epoch 3
Loss = 8.4346e-02, PNorm = 90.2627, GNorm = 0.6619, lr_0 = 9.2036e-04
Loss = 6.8491e-02, PNorm = 90.3430, GNorm = 0.4031, lr_0 = 9.1973e-04
Loss = 7.4580e-02, PNorm = 90.4065, GNorm = 0.5905, lr_0 = 9.1910e-04
Loss = 7.9524e-02, PNorm = 90.4805, GNorm = 0.7500, lr_0 = 9.1847e-04
Loss = 6.3487e-02, PNorm = 90.5479, GNorm = 0.3241, lr_0 = 9.1784e-04
Loss = 7.6805e-02, PNorm = 90.6158, GNorm = 0.4104, lr_0 = 9.1721e-04
Loss = 6.3245e-02, PNorm = 90.6846, GNorm = 0.4067, lr_0 = 9.1658e-04
Loss = 7.9505e-02, PNorm = 90.7432, GNorm = 0.5902, lr_0 = 9.1596e-04
Loss = 7.2623e-02, PNorm = 90.8203, GNorm = 0.3880, lr_0 = 9.1533e-04
Loss = 7.1569e-02, PNorm = 90.8882, GNorm = 0.4218, lr_0 = 9.1470e-04
Loss = 6.3476e-02, PNorm = 90.9563, GNorm = 0.4443, lr_0 = 9.1408e-04
Loss = 6.3827e-02, PNorm = 91.0143, GNorm = 0.5846, lr_0 = 9.1345e-04
Loss = 5.9799e-02, PNorm = 91.0767, GNorm = 0.5517, lr_0 = 9.1282e-04
Loss = 7.0223e-02, PNorm = 91.1357, GNorm = 0.2839, lr_0 = 9.1220e-04
Loss = 6.7784e-02, PNorm = 91.1995, GNorm = 0.4671, lr_0 = 9.1157e-04
Loss = 7.0854e-02, PNorm = 91.2689, GNorm = 0.2513, lr_0 = 9.1095e-04
Loss = 6.3951e-02, PNorm = 91.3460, GNorm = 0.3716, lr_0 = 9.1032e-04
Loss = 5.8502e-02, PNorm = 91.4131, GNorm = 0.3572, lr_0 = 9.0970e-04
Loss = 6.5269e-02, PNorm = 91.4831, GNorm = 0.4288, lr_0 = 9.0908e-04
Loss = 5.7537e-02, PNorm = 91.5488, GNorm = 0.9571, lr_0 = 9.0846e-04
Loss = 6.5295e-02, PNorm = 91.6144, GNorm = 0.4527, lr_0 = 9.0783e-04
Loss = 7.1299e-02, PNorm = 91.6830, GNorm = 0.7660, lr_0 = 9.0721e-04
Loss = 6.1914e-02, PNorm = 91.7612, GNorm = 0.3652, lr_0 = 9.0659e-04
Loss = 6.0710e-02, PNorm = 91.8260, GNorm = 0.6107, lr_0 = 9.0597e-04
Loss = 5.8182e-02, PNorm = 91.8889, GNorm = 0.3907, lr_0 = 9.0535e-04
Loss = 5.8028e-02, PNorm = 91.9503, GNorm = 0.5812, lr_0 = 9.0473e-04
Loss = 6.6692e-02, PNorm = 92.0183, GNorm = 0.4981, lr_0 = 9.0411e-04
Loss = 6.8498e-02, PNorm = 92.0903, GNorm = 0.4606, lr_0 = 9.0349e-04
Loss = 7.6978e-02, PNorm = 92.1685, GNorm = 0.4157, lr_0 = 9.0287e-04
Loss = 6.5840e-02, PNorm = 92.2434, GNorm = 0.5415, lr_0 = 9.0225e-04
Loss = 6.0662e-02, PNorm = 92.3152, GNorm = 0.8249, lr_0 = 9.0163e-04
Loss = 6.8713e-02, PNorm = 92.3845, GNorm = 0.4232, lr_0 = 9.0102e-04
Loss = 6.3229e-02, PNorm = 92.4623, GNorm = 0.3252, lr_0 = 9.0040e-04
Loss = 6.8060e-02, PNorm = 92.5289, GNorm = 0.3771, lr_0 = 8.9978e-04
Loss = 5.9232e-02, PNorm = 92.6078, GNorm = 0.5627, lr_0 = 8.9916e-04
Loss = 7.2117e-02, PNorm = 92.6756, GNorm = 0.5861, lr_0 = 8.9855e-04
Loss = 8.1602e-02, PNorm = 92.7538, GNorm = 0.8005, lr_0 = 8.9793e-04
Loss = 7.1899e-02, PNorm = 92.8358, GNorm = 0.4358, lr_0 = 8.9732e-04
Loss = 6.5990e-02, PNorm = 92.9217, GNorm = 0.4444, lr_0 = 8.9670e-04
Loss = 6.4580e-02, PNorm = 92.9869, GNorm = 0.3635, lr_0 = 8.9609e-04
Loss = 6.7446e-02, PNorm = 93.0654, GNorm = 0.6469, lr_0 = 8.9548e-04
Loss = 7.4063e-02, PNorm = 93.1280, GNorm = 0.4320, lr_0 = 8.9486e-04
Loss = 6.4628e-02, PNorm = 93.2104, GNorm = 0.6026, lr_0 = 8.9425e-04
Loss = 7.4197e-02, PNorm = 93.2710, GNorm = 0.9572, lr_0 = 8.9364e-04
Loss = 7.4240e-02, PNorm = 93.3517, GNorm = 0.3995, lr_0 = 8.9302e-04
Loss = 6.7386e-02, PNorm = 93.4338, GNorm = 0.3794, lr_0 = 8.9241e-04
Loss = 7.9345e-02, PNorm = 93.5104, GNorm = 0.4392, lr_0 = 8.9180e-04
Loss = 8.3112e-02, PNorm = 93.5834, GNorm = 0.8906, lr_0 = 8.9119e-04
Loss = 6.8112e-02, PNorm = 93.6719, GNorm = 0.6547, lr_0 = 8.9058e-04
Loss = 6.4535e-02, PNorm = 93.7482, GNorm = 0.3911, lr_0 = 8.8997e-04
Loss = 5.9046e-02, PNorm = 93.8253, GNorm = 0.6100, lr_0 = 8.8936e-04
Loss = 7.0433e-02, PNorm = 93.8980, GNorm = 0.9337, lr_0 = 8.8875e-04
Loss = 7.5496e-02, PNorm = 93.9710, GNorm = 0.5236, lr_0 = 8.8814e-04
Loss = 7.2616e-02, PNorm = 94.0517, GNorm = 0.4965, lr_0 = 8.8753e-04
Loss = 6.0551e-02, PNorm = 94.1202, GNorm = 0.7980, lr_0 = 8.8693e-04
Loss = 7.8019e-02, PNorm = 94.1984, GNorm = 0.8030, lr_0 = 8.8632e-04
Loss = 7.7718e-02, PNorm = 94.2741, GNorm = 0.4096, lr_0 = 8.8571e-04
Loss = 6.9893e-02, PNorm = 94.3535, GNorm = 0.4600, lr_0 = 8.8510e-04
Loss = 6.6649e-02, PNorm = 94.4271, GNorm = 0.7831, lr_0 = 8.8450e-04
Loss = 7.0504e-02, PNorm = 94.5001, GNorm = 0.7289, lr_0 = 8.8389e-04
Loss = 6.8806e-02, PNorm = 94.5801, GNorm = 0.7881, lr_0 = 8.8329e-04
Loss = 6.0713e-02, PNorm = 94.6529, GNorm = 0.5218, lr_0 = 8.8268e-04
Loss = 6.7726e-02, PNorm = 94.7222, GNorm = 0.5017, lr_0 = 8.8208e-04
Loss = 7.2166e-02, PNorm = 94.8004, GNorm = 0.3128, lr_0 = 8.8147e-04
Loss = 7.3110e-02, PNorm = 94.8825, GNorm = 0.4568, lr_0 = 8.8087e-04
Loss = 6.7179e-02, PNorm = 94.9453, GNorm = 0.5432, lr_0 = 8.8026e-04
Loss = 8.0960e-02, PNorm = 95.0303, GNorm = 0.6752, lr_0 = 8.7966e-04
Loss = 7.0843e-02, PNorm = 95.1138, GNorm = 0.4542, lr_0 = 8.7906e-04
Loss = 7.3262e-02, PNorm = 95.2043, GNorm = 0.4716, lr_0 = 8.7846e-04
Loss = 6.2335e-02, PNorm = 95.2919, GNorm = 0.5262, lr_0 = 8.7785e-04
Loss = 6.2719e-02, PNorm = 95.3713, GNorm = 0.2872, lr_0 = 8.7725e-04
Loss = 7.0770e-02, PNorm = 95.4459, GNorm = 0.3874, lr_0 = 8.7665e-04
Loss = 6.1082e-02, PNorm = 95.5242, GNorm = 0.3565, lr_0 = 8.7605e-04
Loss = 7.6857e-02, PNorm = 95.5968, GNorm = 0.6746, lr_0 = 8.7545e-04
Loss = 7.7743e-02, PNorm = 95.6803, GNorm = 0.4211, lr_0 = 8.7485e-04
Loss = 6.3134e-02, PNorm = 95.7603, GNorm = 0.3963, lr_0 = 8.7425e-04
Loss = 7.0297e-02, PNorm = 95.8340, GNorm = 0.8965, lr_0 = 8.7365e-04
Loss = 7.9902e-02, PNorm = 95.9077, GNorm = 0.3964, lr_0 = 8.7306e-04
Loss = 6.9859e-02, PNorm = 95.9928, GNorm = 0.3351, lr_0 = 8.7246e-04
Loss = 8.7214e-02, PNorm = 96.0811, GNorm = 0.6235, lr_0 = 8.7186e-04
Loss = 7.0357e-02, PNorm = 96.1696, GNorm = 0.4322, lr_0 = 8.7126e-04
Loss = 7.2158e-02, PNorm = 96.2470, GNorm = 0.7464, lr_0 = 8.7067e-04
Loss = 7.2479e-02, PNorm = 96.3249, GNorm = 1.0246, lr_0 = 8.7007e-04
Loss = 6.8277e-02, PNorm = 96.4070, GNorm = 0.4424, lr_0 = 8.6947e-04
Loss = 6.7451e-02, PNorm = 96.4953, GNorm = 0.8726, lr_0 = 8.6888e-04
Loss = 8.3440e-02, PNorm = 96.5748, GNorm = 0.4483, lr_0 = 8.6828e-04
Loss = 7.3858e-02, PNorm = 96.6662, GNorm = 0.4775, lr_0 = 8.6769e-04
Loss = 7.4433e-02, PNorm = 96.7557, GNorm = 0.6024, lr_0 = 8.6709e-04
Loss = 6.0172e-02, PNorm = 96.8388, GNorm = 0.3702, lr_0 = 8.6650e-04
Loss = 7.7311e-02, PNorm = 96.9174, GNorm = 0.5404, lr_0 = 8.6590e-04
Loss = 7.6078e-02, PNorm = 96.9998, GNorm = 0.4973, lr_0 = 8.6531e-04
Loss = 7.7732e-02, PNorm = 97.0958, GNorm = 0.3802, lr_0 = 8.6472e-04
Loss = 8.7307e-02, PNorm = 97.1864, GNorm = 0.5623, lr_0 = 8.6413e-04
Loss = 6.5638e-02, PNorm = 97.2938, GNorm = 0.3451, lr_0 = 8.6353e-04
Loss = 6.8641e-02, PNorm = 97.3862, GNorm = 0.6371, lr_0 = 8.6294e-04
Loss = 7.4682e-02, PNorm = 97.4695, GNorm = 0.7475, lr_0 = 8.6235e-04
Loss = 7.4605e-02, PNorm = 97.5457, GNorm = 0.3961, lr_0 = 8.6176e-04
Loss = 7.8126e-02, PNorm = 97.6314, GNorm = 0.6756, lr_0 = 8.6117e-04
Loss = 8.1128e-02, PNorm = 97.7159, GNorm = 0.5066, lr_0 = 8.6058e-04
Loss = 8.5590e-02, PNorm = 97.8082, GNorm = 1.1506, lr_0 = 8.5999e-04
Loss = 8.1352e-02, PNorm = 97.8972, GNorm = 0.4045, lr_0 = 8.5940e-04
Loss = 6.8102e-02, PNorm = 97.9918, GNorm = 0.3648, lr_0 = 8.5881e-04
Loss = 7.1905e-02, PNorm = 98.0725, GNorm = 0.4962, lr_0 = 8.5823e-04
Loss = 7.2664e-02, PNorm = 98.1554, GNorm = 0.4670, lr_0 = 8.5764e-04
Loss = 7.5168e-02, PNorm = 98.2421, GNorm = 0.3591, lr_0 = 8.5705e-04
Loss = 7.8697e-02, PNorm = 98.3377, GNorm = 0.8479, lr_0 = 8.5646e-04
Loss = 8.2033e-02, PNorm = 98.4253, GNorm = 0.5821, lr_0 = 8.5588e-04
Loss = 7.8836e-02, PNorm = 98.5273, GNorm = 0.7230, lr_0 = 8.5529e-04
Loss = 7.7705e-02, PNorm = 98.6276, GNorm = 0.4350, lr_0 = 8.5470e-04
Loss = 7.6811e-02, PNorm = 98.7135, GNorm = 0.3883, lr_0 = 8.5412e-04
Loss = 6.9090e-02, PNorm = 98.8061, GNorm = 0.7711, lr_0 = 8.5353e-04
Loss = 8.6834e-02, PNorm = 98.8879, GNorm = 0.9415, lr_0 = 8.5295e-04
Loss = 8.8757e-02, PNorm = 98.9879, GNorm = 0.3789, lr_0 = 8.5236e-04
Loss = 8.6396e-02, PNorm = 99.0813, GNorm = 0.5929, lr_0 = 8.5178e-04
Loss = 8.3999e-02, PNorm = 99.1720, GNorm = 0.4974, lr_0 = 8.5120e-04
Loss = 7.6033e-02, PNorm = 99.2686, GNorm = 0.3994, lr_0 = 8.5061e-04
Loss = 8.1032e-02, PNorm = 99.3582, GNorm = 0.6695, lr_0 = 8.5003e-04
Loss = 9.3895e-02, PNorm = 99.4558, GNorm = 0.4664, lr_0 = 8.4945e-04
Loss = 8.0890e-02, PNorm = 99.5422, GNorm = 0.3969, lr_0 = 8.4887e-04
Loss = 7.3784e-02, PNorm = 99.6384, GNorm = 0.5974, lr_0 = 8.4828e-04
Validation mae = 0.501515
Epoch 4
Loss = 5.3610e-02, PNorm = 99.7193, GNorm = 0.7378, lr_0 = 8.4770e-04
Loss = 5.9986e-02, PNorm = 99.7914, GNorm = 0.8367, lr_0 = 8.4712e-04
Loss = 4.7526e-02, PNorm = 99.8506, GNorm = 0.2658, lr_0 = 8.4654e-04
Loss = 4.8421e-02, PNorm = 99.9064, GNorm = 0.5839, lr_0 = 8.4596e-04
Loss = 5.3782e-02, PNorm = 99.9671, GNorm = 0.5442, lr_0 = 8.4538e-04
Loss = 4.4895e-02, PNorm = 100.0232, GNorm = 0.6333, lr_0 = 8.4480e-04
Loss = 5.3302e-02, PNorm = 100.0864, GNorm = 1.3361, lr_0 = 8.4423e-04
Loss = 4.8083e-02, PNorm = 100.1470, GNorm = 0.6457, lr_0 = 8.4365e-04
Loss = 4.7377e-02, PNorm = 100.1991, GNorm = 0.3369, lr_0 = 8.4307e-04
Loss = 4.2246e-02, PNorm = 100.2594, GNorm = 0.5477, lr_0 = 8.4249e-04
Loss = 4.4143e-02, PNorm = 100.3177, GNorm = 0.3747, lr_0 = 8.4191e-04
Loss = 5.1090e-02, PNorm = 100.3842, GNorm = 0.3242, lr_0 = 8.4134e-04
Loss = 4.8584e-02, PNorm = 100.4345, GNorm = 0.4359, lr_0 = 8.4076e-04
Loss = 4.4468e-02, PNorm = 100.4905, GNorm = 0.7697, lr_0 = 8.4019e-04
Loss = 4.6487e-02, PNorm = 100.5380, GNorm = 1.0219, lr_0 = 8.3961e-04
Loss = 4.3998e-02, PNorm = 100.5942, GNorm = 0.3486, lr_0 = 8.3903e-04
Loss = 4.1864e-02, PNorm = 100.6484, GNorm = 0.4689, lr_0 = 8.3846e-04
Loss = 4.3446e-02, PNorm = 100.7074, GNorm = 0.2197, lr_0 = 8.3789e-04
Loss = 4.1248e-02, PNorm = 100.7609, GNorm = 0.3588, lr_0 = 8.3731e-04
Loss = 4.2282e-02, PNorm = 100.8199, GNorm = 0.3140, lr_0 = 8.3674e-04
Loss = 4.5918e-02, PNorm = 100.8786, GNorm = 0.5819, lr_0 = 8.3616e-04
Loss = 4.8635e-02, PNorm = 100.9382, GNorm = 0.3147, lr_0 = 8.3559e-04
Loss = 4.9632e-02, PNorm = 100.9948, GNorm = 0.3846, lr_0 = 8.3502e-04
Loss = 4.4662e-02, PNorm = 101.0538, GNorm = 0.2937, lr_0 = 8.3445e-04
Loss = 4.7249e-02, PNorm = 101.1108, GNorm = 0.3763, lr_0 = 8.3388e-04
Loss = 4.8494e-02, PNorm = 101.1716, GNorm = 0.7202, lr_0 = 8.3330e-04
Loss = 3.6882e-02, PNorm = 101.2289, GNorm = 0.4451, lr_0 = 8.3273e-04
Loss = 4.7501e-02, PNorm = 101.2838, GNorm = 0.2853, lr_0 = 8.3216e-04
Loss = 4.8728e-02, PNorm = 101.3411, GNorm = 0.3570, lr_0 = 8.3159e-04
Loss = 4.0805e-02, PNorm = 101.4001, GNorm = 0.4131, lr_0 = 8.3102e-04
Loss = 4.3424e-02, PNorm = 101.4485, GNorm = 1.0542, lr_0 = 8.3045e-04
Loss = 4.5635e-02, PNorm = 101.5076, GNorm = 0.4935, lr_0 = 8.2988e-04
Loss = 5.0852e-02, PNorm = 101.5672, GNorm = 0.4101, lr_0 = 8.2932e-04
Loss = 4.2457e-02, PNorm = 101.6245, GNorm = 0.4426, lr_0 = 8.2875e-04
Loss = 5.0546e-02, PNorm = 101.6797, GNorm = 0.4015, lr_0 = 8.2818e-04
Loss = 4.2423e-02, PNorm = 101.7379, GNorm = 0.7841, lr_0 = 8.2761e-04
Loss = 4.2829e-02, PNorm = 101.7944, GNorm = 0.3108, lr_0 = 8.2705e-04
Loss = 4.9583e-02, PNorm = 101.8480, GNorm = 0.5262, lr_0 = 8.2648e-04
Loss = 4.7923e-02, PNorm = 101.9036, GNorm = 0.2724, lr_0 = 8.2591e-04
Loss = 4.6261e-02, PNorm = 101.9551, GNorm = 0.6275, lr_0 = 8.2535e-04
Loss = 4.4917e-02, PNorm = 102.0212, GNorm = 0.5864, lr_0 = 8.2478e-04
Loss = 4.8503e-02, PNorm = 102.0808, GNorm = 0.2848, lr_0 = 8.2422e-04
Loss = 5.5800e-02, PNorm = 102.1416, GNorm = 0.2518, lr_0 = 8.2365e-04
Loss = 4.6602e-02, PNorm = 102.2037, GNorm = 0.3959, lr_0 = 8.2309e-04
Loss = 4.5821e-02, PNorm = 102.2656, GNorm = 0.6432, lr_0 = 8.2252e-04
Loss = 4.4453e-02, PNorm = 102.3304, GNorm = 0.5325, lr_0 = 8.2196e-04
Loss = 4.3988e-02, PNorm = 102.3978, GNorm = 0.3333, lr_0 = 8.2140e-04
Loss = 5.2322e-02, PNorm = 102.4661, GNorm = 0.3865, lr_0 = 8.2084e-04
Loss = 4.7078e-02, PNorm = 102.5266, GNorm = 0.6181, lr_0 = 8.2027e-04
Loss = 4.6592e-02, PNorm = 102.5877, GNorm = 0.4301, lr_0 = 8.1971e-04
Loss = 4.0918e-02, PNorm = 102.6529, GNorm = 0.2736, lr_0 = 8.1915e-04
Loss = 5.4090e-02, PNorm = 102.7209, GNorm = 0.4202, lr_0 = 8.1859e-04
Loss = 4.4173e-02, PNorm = 102.7868, GNorm = 0.4128, lr_0 = 8.1803e-04
Loss = 4.8625e-02, PNorm = 102.8623, GNorm = 0.6201, lr_0 = 8.1747e-04
Loss = 5.2829e-02, PNorm = 102.9330, GNorm = 0.4918, lr_0 = 8.1691e-04
Loss = 4.5538e-02, PNorm = 103.0063, GNorm = 0.4088, lr_0 = 8.1635e-04
Loss = 4.8084e-02, PNorm = 103.0664, GNorm = 0.6126, lr_0 = 8.1579e-04
Loss = 4.4594e-02, PNorm = 103.1263, GNorm = 0.6327, lr_0 = 8.1523e-04
Loss = 4.5949e-02, PNorm = 103.1788, GNorm = 0.7303, lr_0 = 8.1467e-04
Loss = 5.8037e-02, PNorm = 103.2511, GNorm = 1.2211, lr_0 = 8.1411e-04
Loss = 4.6897e-02, PNorm = 103.3132, GNorm = 0.5302, lr_0 = 8.1355e-04
Loss = 5.8668e-02, PNorm = 103.3822, GNorm = 0.9500, lr_0 = 8.1300e-04
Loss = 4.9681e-02, PNorm = 103.4589, GNorm = 0.5569, lr_0 = 8.1244e-04
Loss = 4.7873e-02, PNorm = 103.5287, GNorm = 0.5767, lr_0 = 8.1188e-04
Loss = 5.0635e-02, PNorm = 103.6007, GNorm = 0.5775, lr_0 = 8.1133e-04
Loss = 5.3160e-02, PNorm = 103.6737, GNorm = 0.4835, lr_0 = 8.1077e-04
Loss = 4.6349e-02, PNorm = 103.7514, GNorm = 0.3348, lr_0 = 8.1022e-04
Loss = 4.7950e-02, PNorm = 103.8255, GNorm = 0.3013, lr_0 = 8.0966e-04
Loss = 5.4133e-02, PNorm = 103.8943, GNorm = 0.5149, lr_0 = 8.0911e-04
Loss = 5.0576e-02, PNorm = 103.9624, GNorm = 0.7889, lr_0 = 8.0855e-04
Loss = 5.2266e-02, PNorm = 104.0313, GNorm = 0.7322, lr_0 = 8.0800e-04
Loss = 3.9749e-02, PNorm = 104.0962, GNorm = 0.4794, lr_0 = 8.0745e-04
Loss = 5.2259e-02, PNorm = 104.1616, GNorm = 0.4721, lr_0 = 8.0689e-04
Loss = 5.5811e-02, PNorm = 104.2271, GNorm = 0.4962, lr_0 = 8.0634e-04
Loss = 4.9563e-02, PNorm = 104.3021, GNorm = 0.3719, lr_0 = 8.0579e-04
Loss = 5.1017e-02, PNorm = 104.3795, GNorm = 0.5742, lr_0 = 8.0523e-04
Loss = 4.8169e-02, PNorm = 104.4509, GNorm = 0.6747, lr_0 = 8.0468e-04
Loss = 5.6315e-02, PNorm = 104.5231, GNorm = 0.5510, lr_0 = 8.0413e-04
Loss = 4.5670e-02, PNorm = 104.5997, GNorm = 0.4887, lr_0 = 8.0358e-04
Loss = 5.3955e-02, PNorm = 104.6764, GNorm = 0.3144, lr_0 = 8.0303e-04
Loss = 4.2899e-02, PNorm = 104.7454, GNorm = 0.2419, lr_0 = 8.0248e-04
Loss = 5.1174e-02, PNorm = 104.8173, GNorm = 0.3321, lr_0 = 8.0193e-04
Loss = 5.3242e-02, PNorm = 104.8857, GNorm = 1.0710, lr_0 = 8.0138e-04
Loss = 5.2910e-02, PNorm = 104.9628, GNorm = 0.4035, lr_0 = 8.0083e-04
Loss = 5.4334e-02, PNorm = 105.0415, GNorm = 0.7128, lr_0 = 8.0028e-04
Loss = 4.8436e-02, PNorm = 105.1192, GNorm = 0.3801, lr_0 = 7.9974e-04
Loss = 5.2395e-02, PNorm = 105.1991, GNorm = 0.4923, lr_0 = 7.9919e-04
Loss = 5.4656e-02, PNorm = 105.2729, GNorm = 0.6425, lr_0 = 7.9864e-04
Loss = 4.4251e-02, PNorm = 105.3480, GNorm = 0.2432, lr_0 = 7.9809e-04
Loss = 4.9187e-02, PNorm = 105.4238, GNorm = 0.4445, lr_0 = 7.9755e-04
Loss = 5.4607e-02, PNorm = 105.4968, GNorm = 0.3313, lr_0 = 7.9700e-04
Loss = 4.7452e-02, PNorm = 105.5781, GNorm = 0.5175, lr_0 = 7.9645e-04
Loss = 5.9781e-02, PNorm = 105.6567, GNorm = 0.4395, lr_0 = 7.9591e-04
Loss = 5.2713e-02, PNorm = 105.7376, GNorm = 0.3153, lr_0 = 7.9536e-04
Loss = 5.5315e-02, PNorm = 105.8112, GNorm = 0.3197, lr_0 = 7.9482e-04
Loss = 5.9376e-02, PNorm = 105.8929, GNorm = 0.6626, lr_0 = 7.9427e-04
Loss = 4.9244e-02, PNorm = 105.9846, GNorm = 0.5964, lr_0 = 7.9373e-04
Loss = 5.7714e-02, PNorm = 106.0703, GNorm = 0.3157, lr_0 = 7.9319e-04
Loss = 6.2320e-02, PNorm = 106.1505, GNorm = 0.8350, lr_0 = 7.9264e-04
Loss = 6.1173e-02, PNorm = 106.2442, GNorm = 0.3111, lr_0 = 7.9210e-04
Loss = 6.2075e-02, PNorm = 106.3385, GNorm = 0.8210, lr_0 = 7.9156e-04
Loss = 5.8174e-02, PNorm = 106.4325, GNorm = 0.2976, lr_0 = 7.9101e-04
Loss = 5.3511e-02, PNorm = 106.5137, GNorm = 0.5133, lr_0 = 7.9047e-04
Loss = 6.1080e-02, PNorm = 106.5905, GNorm = 0.7484, lr_0 = 7.8993e-04
Loss = 6.1560e-02, PNorm = 106.6627, GNorm = 0.3974, lr_0 = 7.8939e-04
Loss = 5.5396e-02, PNorm = 106.7457, GNorm = 0.3229, lr_0 = 7.8885e-04
Loss = 5.9213e-02, PNorm = 106.8246, GNorm = 0.4514, lr_0 = 7.8831e-04
Loss = 4.8598e-02, PNorm = 106.9051, GNorm = 0.3890, lr_0 = 7.8777e-04
Loss = 5.2243e-02, PNorm = 106.9795, GNorm = 0.7270, lr_0 = 7.8723e-04
Loss = 5.7975e-02, PNorm = 107.0543, GNorm = 0.7363, lr_0 = 7.8669e-04
Loss = 5.8169e-02, PNorm = 107.1252, GNorm = 0.5306, lr_0 = 7.8615e-04
Loss = 6.3936e-02, PNorm = 107.1999, GNorm = 0.8137, lr_0 = 7.8561e-04
Loss = 4.9878e-02, PNorm = 107.2742, GNorm = 0.3998, lr_0 = 7.8507e-04
Loss = 6.0839e-02, PNorm = 107.3553, GNorm = 0.7272, lr_0 = 7.8454e-04
Loss = 6.0183e-02, PNorm = 107.4415, GNorm = 0.3466, lr_0 = 7.8400e-04
Loss = 5.4307e-02, PNorm = 107.5328, GNorm = 0.5824, lr_0 = 7.8346e-04
Loss = 5.6517e-02, PNorm = 107.6080, GNorm = 0.4845, lr_0 = 7.8293e-04
Loss = 5.9352e-02, PNorm = 107.6956, GNorm = 0.9916, lr_0 = 7.8239e-04
Loss = 5.0422e-02, PNorm = 107.7729, GNorm = 0.3000, lr_0 = 7.8185e-04
Loss = 5.3149e-02, PNorm = 107.8543, GNorm = 1.0367, lr_0 = 7.8132e-04
Validation mae = 0.497870
Epoch 5
Loss = 3.9201e-02, PNorm = 107.9249, GNorm = 0.2073, lr_0 = 7.8078e-04
Loss = 4.1074e-02, PNorm = 107.9811, GNorm = 0.2283, lr_0 = 7.8025e-04
Loss = 3.5458e-02, PNorm = 108.0324, GNorm = 0.3848, lr_0 = 7.7971e-04
Loss = 4.0464e-02, PNorm = 108.0847, GNorm = 0.3015, lr_0 = 7.7918e-04
Loss = 3.4655e-02, PNorm = 108.1421, GNorm = 0.3125, lr_0 = 7.7864e-04
Loss = 3.2225e-02, PNorm = 108.1912, GNorm = 0.2964, lr_0 = 7.7811e-04
Loss = 3.8492e-02, PNorm = 108.2379, GNorm = 0.2465, lr_0 = 7.7758e-04
Loss = 3.4039e-02, PNorm = 108.2867, GNorm = 0.3434, lr_0 = 7.7705e-04
Loss = 3.6940e-02, PNorm = 108.3351, GNorm = 0.6878, lr_0 = 7.7651e-04
Loss = 3.3970e-02, PNorm = 108.3930, GNorm = 0.4931, lr_0 = 7.7598e-04
Loss = 3.7290e-02, PNorm = 108.4385, GNorm = 0.5007, lr_0 = 7.7545e-04
Loss = 3.8440e-02, PNorm = 108.4874, GNorm = 0.3536, lr_0 = 7.7492e-04
Loss = 4.1423e-02, PNorm = 108.5291, GNorm = 0.4781, lr_0 = 7.7439e-04
Loss = 3.4515e-02, PNorm = 108.5833, GNorm = 0.4259, lr_0 = 7.7386e-04
Loss = 4.0939e-02, PNorm = 108.6405, GNorm = 1.3662, lr_0 = 7.7333e-04
Loss = 3.2108e-02, PNorm = 108.6917, GNorm = 0.5766, lr_0 = 7.7280e-04
Loss = 3.3535e-02, PNorm = 108.7443, GNorm = 0.5669, lr_0 = 7.7227e-04
Loss = 2.9982e-02, PNorm = 108.7881, GNorm = 0.2315, lr_0 = 7.7174e-04
Loss = 2.9108e-02, PNorm = 108.8324, GNorm = 0.1846, lr_0 = 7.7121e-04
Loss = 3.7785e-02, PNorm = 108.8783, GNorm = 0.2807, lr_0 = 7.7068e-04
Loss = 3.7233e-02, PNorm = 108.9270, GNorm = 0.4158, lr_0 = 7.7015e-04
Loss = 3.7564e-02, PNorm = 108.9864, GNorm = 0.8939, lr_0 = 7.6963e-04
Loss = 3.3340e-02, PNorm = 109.0469, GNorm = 0.3266, lr_0 = 7.6910e-04
Loss = 3.0766e-02, PNorm = 109.0998, GNorm = 0.2290, lr_0 = 7.6857e-04
Loss = 3.3558e-02, PNorm = 109.1453, GNorm = 0.5063, lr_0 = 7.6805e-04
Loss = 3.9156e-02, PNorm = 109.1893, GNorm = 0.2431, lr_0 = 7.6752e-04
Loss = 3.0537e-02, PNorm = 109.2414, GNorm = 0.4600, lr_0 = 7.6699e-04
Loss = 3.3129e-02, PNorm = 109.2883, GNorm = 0.3561, lr_0 = 7.6647e-04
Loss = 3.8125e-02, PNorm = 109.3432, GNorm = 0.5420, lr_0 = 7.6594e-04
Loss = 3.8291e-02, PNorm = 109.3999, GNorm = 0.5015, lr_0 = 7.6542e-04
Loss = 3.0553e-02, PNorm = 109.4558, GNorm = 0.1719, lr_0 = 7.6489e-04
Loss = 3.3706e-02, PNorm = 109.5083, GNorm = 0.5698, lr_0 = 7.6437e-04
Loss = 2.9910e-02, PNorm = 109.5719, GNorm = 0.2950, lr_0 = 7.6385e-04
Loss = 3.5261e-02, PNorm = 109.6183, GNorm = 0.5688, lr_0 = 7.6332e-04
Loss = 3.1006e-02, PNorm = 109.6615, GNorm = 0.2357, lr_0 = 7.6280e-04
Loss = 3.6067e-02, PNorm = 109.7116, GNorm = 0.9498, lr_0 = 7.6228e-04
Loss = 3.7450e-02, PNorm = 109.7707, GNorm = 0.5929, lr_0 = 7.6176e-04
Loss = 3.7027e-02, PNorm = 109.8258, GNorm = 0.5397, lr_0 = 7.6123e-04
Loss = 3.2197e-02, PNorm = 109.8700, GNorm = 0.3698, lr_0 = 7.6071e-04
Loss = 3.6271e-02, PNorm = 109.9185, GNorm = 0.6097, lr_0 = 7.6019e-04
Loss = 3.3467e-02, PNorm = 109.9729, GNorm = 0.3728, lr_0 = 7.5967e-04
Loss = 3.5431e-02, PNorm = 110.0184, GNorm = 0.2072, lr_0 = 7.5915e-04
Loss = 3.3374e-02, PNorm = 110.0672, GNorm = 0.6666, lr_0 = 7.5863e-04
Loss = 3.4137e-02, PNorm = 110.1206, GNorm = 0.4651, lr_0 = 7.5811e-04
Loss = 3.5455e-02, PNorm = 110.1693, GNorm = 0.4734, lr_0 = 7.5759e-04
Loss = 3.1499e-02, PNorm = 110.2224, GNorm = 0.6455, lr_0 = 7.5707e-04
Loss = 3.2280e-02, PNorm = 110.2740, GNorm = 0.2420, lr_0 = 7.5655e-04
Loss = 3.6547e-02, PNorm = 110.3210, GNorm = 0.4481, lr_0 = 7.5603e-04
Loss = 4.0164e-02, PNorm = 110.3787, GNorm = 0.4334, lr_0 = 7.5552e-04
Loss = 3.4418e-02, PNorm = 110.4425, GNorm = 0.3892, lr_0 = 7.5500e-04
Loss = 3.8833e-02, PNorm = 110.5041, GNorm = 0.9993, lr_0 = 7.5448e-04
Loss = 3.1879e-02, PNorm = 110.5618, GNorm = 0.5556, lr_0 = 7.5397e-04
Loss = 3.6541e-02, PNorm = 110.6253, GNorm = 0.3070, lr_0 = 7.5345e-04
Loss = 4.0655e-02, PNorm = 110.6823, GNorm = 0.3251, lr_0 = 7.5293e-04
Loss = 3.5146e-02, PNorm = 110.7368, GNorm = 0.7793, lr_0 = 7.5242e-04
Loss = 3.7954e-02, PNorm = 110.8032, GNorm = 0.6820, lr_0 = 7.5190e-04
Loss = 4.0979e-02, PNorm = 110.8616, GNorm = 0.3811, lr_0 = 7.5139e-04
Loss = 3.5374e-02, PNorm = 110.9269, GNorm = 0.5207, lr_0 = 7.5087e-04
Loss = 3.6442e-02, PNorm = 110.9854, GNorm = 0.6119, lr_0 = 7.5036e-04
Loss = 3.8738e-02, PNorm = 111.0458, GNorm = 0.4990, lr_0 = 7.4984e-04
Loss = 3.7025e-02, PNorm = 111.1048, GNorm = 0.6262, lr_0 = 7.4933e-04
Loss = 3.5683e-02, PNorm = 111.1674, GNorm = 0.5240, lr_0 = 7.4882e-04
Loss = 3.8130e-02, PNorm = 111.2250, GNorm = 0.4175, lr_0 = 7.4830e-04
Loss = 3.2731e-02, PNorm = 111.2889, GNorm = 0.6544, lr_0 = 7.4779e-04
Loss = 3.8006e-02, PNorm = 111.3474, GNorm = 0.4249, lr_0 = 7.4728e-04
Loss = 3.7991e-02, PNorm = 111.4172, GNorm = 0.3832, lr_0 = 7.4677e-04
Loss = 3.9008e-02, PNorm = 111.4827, GNorm = 0.3631, lr_0 = 7.4625e-04
Loss = 4.1566e-02, PNorm = 111.5463, GNorm = 0.6016, lr_0 = 7.4574e-04
Loss = 3.7219e-02, PNorm = 111.6095, GNorm = 0.6404, lr_0 = 7.4523e-04
Loss = 4.1086e-02, PNorm = 111.6755, GNorm = 0.4907, lr_0 = 7.4472e-04
Loss = 3.8850e-02, PNorm = 111.7371, GNorm = 0.5397, lr_0 = 7.4421e-04
Loss = 4.0088e-02, PNorm = 111.8056, GNorm = 0.5568, lr_0 = 7.4370e-04
Loss = 3.7803e-02, PNorm = 111.8678, GNorm = 0.3803, lr_0 = 7.4319e-04
Loss = 4.1452e-02, PNorm = 111.9393, GNorm = 0.9094, lr_0 = 7.4268e-04
Loss = 3.7292e-02, PNorm = 112.0036, GNorm = 0.6321, lr_0 = 7.4217e-04
Loss = 3.7663e-02, PNorm = 112.0702, GNorm = 0.4048, lr_0 = 7.4167e-04
Loss = 4.2394e-02, PNorm = 112.1311, GNorm = 0.7600, lr_0 = 7.4116e-04
Loss = 3.9753e-02, PNorm = 112.2037, GNorm = 0.6617, lr_0 = 7.4065e-04
Loss = 3.8823e-02, PNorm = 112.2672, GNorm = 0.2721, lr_0 = 7.4014e-04
Loss = 4.1134e-02, PNorm = 112.3369, GNorm = 0.6663, lr_0 = 7.3964e-04
Loss = 3.2350e-02, PNorm = 112.4037, GNorm = 0.3049, lr_0 = 7.3913e-04
Loss = 3.9137e-02, PNorm = 112.4698, GNorm = 0.2980, lr_0 = 7.3862e-04
Loss = 3.7261e-02, PNorm = 112.5398, GNorm = 0.3345, lr_0 = 7.3812e-04
Loss = 3.4164e-02, PNorm = 112.6092, GNorm = 0.3562, lr_0 = 7.3761e-04
Loss = 3.8535e-02, PNorm = 112.6790, GNorm = 0.3795, lr_0 = 7.3711e-04
Loss = 3.6355e-02, PNorm = 112.7464, GNorm = 0.5295, lr_0 = 7.3660e-04
Loss = 4.0285e-02, PNorm = 112.8165, GNorm = 0.5743, lr_0 = 7.3610e-04
Loss = 3.9844e-02, PNorm = 112.8881, GNorm = 0.4546, lr_0 = 7.3559e-04
Loss = 3.9118e-02, PNorm = 112.9551, GNorm = 0.7425, lr_0 = 7.3509e-04
Loss = 3.8997e-02, PNorm = 113.0336, GNorm = 0.3308, lr_0 = 7.3458e-04
Loss = 3.5273e-02, PNorm = 113.1060, GNorm = 1.0523, lr_0 = 7.3408e-04
Loss = 4.2961e-02, PNorm = 113.1812, GNorm = 1.0982, lr_0 = 7.3358e-04
Loss = 3.5980e-02, PNorm = 113.2431, GNorm = 0.5361, lr_0 = 7.3308e-04
Loss = 4.3071e-02, PNorm = 113.3160, GNorm = 0.5824, lr_0 = 7.3257e-04
Loss = 4.1619e-02, PNorm = 113.3834, GNorm = 0.7251, lr_0 = 7.3207e-04
Loss = 4.7451e-02, PNorm = 113.4623, GNorm = 0.4013, lr_0 = 7.3157e-04
Loss = 3.9875e-02, PNorm = 113.5427, GNorm = 0.3618, lr_0 = 7.3107e-04
Loss = 3.7967e-02, PNorm = 113.6215, GNorm = 0.4525, lr_0 = 7.3057e-04
Loss = 4.2662e-02, PNorm = 113.6924, GNorm = 0.8007, lr_0 = 7.3007e-04
Loss = 4.5300e-02, PNorm = 113.7674, GNorm = 0.2888, lr_0 = 7.2957e-04
Loss = 4.4073e-02, PNorm = 113.8470, GNorm = 0.5808, lr_0 = 7.2907e-04
Loss = 4.1970e-02, PNorm = 113.9277, GNorm = 0.2860, lr_0 = 7.2857e-04
Loss = 4.4746e-02, PNorm = 113.9917, GNorm = 0.2426, lr_0 = 7.2807e-04
Loss = 3.5864e-02, PNorm = 114.0599, GNorm = 0.3958, lr_0 = 7.2757e-04
Loss = 4.1373e-02, PNorm = 114.1262, GNorm = 0.2740, lr_0 = 7.2707e-04
Loss = 3.6704e-02, PNorm = 114.1972, GNorm = 0.3327, lr_0 = 7.2657e-04
Loss = 3.8231e-02, PNorm = 114.2603, GNorm = 1.3659, lr_0 = 7.2608e-04
Loss = 3.9854e-02, PNorm = 114.3343, GNorm = 0.6692, lr_0 = 7.2558e-04
Loss = 3.6175e-02, PNorm = 114.4016, GNorm = 0.3606, lr_0 = 7.2508e-04
Loss = 3.8217e-02, PNorm = 114.4654, GNorm = 0.6713, lr_0 = 7.2458e-04
Loss = 3.7609e-02, PNorm = 114.5250, GNorm = 0.3260, lr_0 = 7.2409e-04
Loss = 3.8693e-02, PNorm = 114.5881, GNorm = 0.3294, lr_0 = 7.2359e-04
Loss = 3.6898e-02, PNorm = 114.6559, GNorm = 0.6181, lr_0 = 7.2310e-04
Loss = 3.9761e-02, PNorm = 114.7252, GNorm = 0.4130, lr_0 = 7.2260e-04
Loss = 3.7575e-02, PNorm = 114.7931, GNorm = 0.5525, lr_0 = 7.2211e-04
Loss = 3.7226e-02, PNorm = 114.8682, GNorm = 0.4006, lr_0 = 7.2161e-04
Loss = 4.1084e-02, PNorm = 114.9402, GNorm = 0.3728, lr_0 = 7.2112e-04
Loss = 3.5230e-02, PNorm = 115.0111, GNorm = 0.3718, lr_0 = 7.2062e-04
Loss = 4.4386e-02, PNorm = 115.0755, GNorm = 0.4139, lr_0 = 7.2013e-04
Loss = 4.2993e-02, PNorm = 115.1457, GNorm = 0.7208, lr_0 = 7.1964e-04
Validation mae = 0.492690
Epoch 6
Loss = 3.0644e-02, PNorm = 115.2060, GNorm = 0.3354, lr_0 = 7.1914e-04
Loss = 3.4277e-02, PNorm = 115.2586, GNorm = 0.2110, lr_0 = 7.1865e-04
Loss = 2.8464e-02, PNorm = 115.3000, GNorm = 0.7004, lr_0 = 7.1816e-04
Loss = 2.5153e-02, PNorm = 115.3469, GNorm = 0.2413, lr_0 = 7.1767e-04
Loss = 3.4185e-02, PNorm = 115.3889, GNorm = 0.3656, lr_0 = 7.1717e-04
Loss = 3.0206e-02, PNorm = 115.4289, GNorm = 0.2344, lr_0 = 7.1668e-04
Loss = 3.5323e-02, PNorm = 115.4732, GNorm = 0.2387, lr_0 = 7.1619e-04
Loss = 3.3086e-02, PNorm = 115.5210, GNorm = 0.3940, lr_0 = 7.1570e-04
Loss = 3.1733e-02, PNorm = 115.5617, GNorm = 0.2910, lr_0 = 7.1521e-04
Loss = 2.8754e-02, PNorm = 115.6080, GNorm = 0.2263, lr_0 = 7.1472e-04
Loss = 2.7149e-02, PNorm = 115.6532, GNorm = 0.2364, lr_0 = 7.1423e-04
Loss = 2.5253e-02, PNorm = 115.6978, GNorm = 0.2942, lr_0 = 7.1374e-04
Loss = 2.6746e-02, PNorm = 115.7429, GNorm = 0.4678, lr_0 = 7.1325e-04
Loss = 2.9445e-02, PNorm = 115.7876, GNorm = 0.2506, lr_0 = 7.1277e-04
Loss = 2.3153e-02, PNorm = 115.8364, GNorm = 0.4502, lr_0 = 7.1228e-04
Loss = 3.0015e-02, PNorm = 115.8808, GNorm = 0.3040, lr_0 = 7.1179e-04
Loss = 3.2526e-02, PNorm = 115.9229, GNorm = 0.8378, lr_0 = 7.1130e-04
Loss = 2.4011e-02, PNorm = 115.9654, GNorm = 0.3869, lr_0 = 7.1081e-04
Loss = 2.6293e-02, PNorm = 116.0039, GNorm = 0.3401, lr_0 = 7.1033e-04
Loss = 3.0385e-02, PNorm = 116.0476, GNorm = 0.6315, lr_0 = 7.0984e-04
Loss = 2.9313e-02, PNorm = 116.0913, GNorm = 0.2755, lr_0 = 7.0935e-04
Loss = 2.6736e-02, PNorm = 116.1382, GNorm = 0.3492, lr_0 = 7.0887e-04
Loss = 3.0307e-02, PNorm = 116.1778, GNorm = 1.0196, lr_0 = 7.0838e-04
Loss = 2.6444e-02, PNorm = 116.2280, GNorm = 0.2530, lr_0 = 7.0790e-04
Loss = 2.9990e-02, PNorm = 116.2747, GNorm = 0.4466, lr_0 = 7.0741e-04
Loss = 2.4586e-02, PNorm = 116.3193, GNorm = 0.3096, lr_0 = 7.0693e-04
Loss = 2.6336e-02, PNorm = 116.3589, GNorm = 0.3782, lr_0 = 7.0644e-04
Loss = 3.2403e-02, PNorm = 116.4036, GNorm = 0.3064, lr_0 = 7.0596e-04
Loss = 2.7781e-02, PNorm = 116.4473, GNorm = 0.3998, lr_0 = 7.0548e-04
Loss = 2.8836e-02, PNorm = 116.4971, GNorm = 0.5858, lr_0 = 7.0499e-04
Loss = 2.5815e-02, PNorm = 116.5487, GNorm = 0.6710, lr_0 = 7.0451e-04
Loss = 2.9329e-02, PNorm = 116.5921, GNorm = 0.7196, lr_0 = 7.0403e-04
Loss = 2.3696e-02, PNorm = 116.6375, GNorm = 0.3230, lr_0 = 7.0354e-04
Loss = 2.8883e-02, PNorm = 116.6869, GNorm = 0.4840, lr_0 = 7.0306e-04
Loss = 2.9809e-02, PNorm = 116.7403, GNorm = 0.3034, lr_0 = 7.0258e-04
Loss = 3.0967e-02, PNorm = 116.7885, GNorm = 1.1308, lr_0 = 7.0210e-04
Loss = 2.6144e-02, PNorm = 116.8345, GNorm = 0.2435, lr_0 = 7.0162e-04
Loss = 3.0231e-02, PNorm = 116.8872, GNorm = 0.6199, lr_0 = 7.0114e-04
Loss = 2.7971e-02, PNorm = 116.9408, GNorm = 0.4508, lr_0 = 7.0066e-04
Loss = 2.9285e-02, PNorm = 116.9967, GNorm = 0.2018, lr_0 = 7.0018e-04
Loss = 2.4853e-02, PNorm = 117.0474, GNorm = 0.2262, lr_0 = 6.9970e-04
Loss = 2.7366e-02, PNorm = 117.0887, GNorm = 0.6429, lr_0 = 6.9922e-04
Loss = 2.9286e-02, PNorm = 117.1303, GNorm = 0.3768, lr_0 = 6.9874e-04
Loss = 2.5854e-02, PNorm = 117.1813, GNorm = 0.3034, lr_0 = 6.9826e-04
Loss = 2.7754e-02, PNorm = 117.2336, GNorm = 0.2141, lr_0 = 6.9778e-04
Loss = 2.9826e-02, PNorm = 117.2908, GNorm = 0.4458, lr_0 = 6.9730e-04
Loss = 2.7930e-02, PNorm = 117.3362, GNorm = 0.3914, lr_0 = 6.9683e-04
Loss = 2.9008e-02, PNorm = 117.3819, GNorm = 0.7207, lr_0 = 6.9635e-04
Loss = 2.5948e-02, PNorm = 117.4286, GNorm = 0.1881, lr_0 = 6.9587e-04
Loss = 2.6360e-02, PNorm = 117.4785, GNorm = 0.2166, lr_0 = 6.9540e-04
Loss = 2.9352e-02, PNorm = 117.5264, GNorm = 0.3454, lr_0 = 6.9492e-04
Loss = 2.8893e-02, PNorm = 117.5785, GNorm = 0.2472, lr_0 = 6.9444e-04
Loss = 2.6634e-02, PNorm = 117.6364, GNorm = 0.1675, lr_0 = 6.9397e-04
Loss = 2.8493e-02, PNorm = 117.6935, GNorm = 0.2469, lr_0 = 6.9349e-04
Loss = 2.3052e-02, PNorm = 117.7465, GNorm = 0.9607, lr_0 = 6.9302e-04
Loss = 2.4357e-02, PNorm = 117.7970, GNorm = 0.5548, lr_0 = 6.9254e-04
Loss = 2.9880e-02, PNorm = 117.8381, GNorm = 0.6159, lr_0 = 6.9207e-04
Loss = 2.8998e-02, PNorm = 117.8897, GNorm = 0.6488, lr_0 = 6.9159e-04
Loss = 3.1427e-02, PNorm = 117.9471, GNorm = 0.4296, lr_0 = 6.9112e-04
Loss = 2.9043e-02, PNorm = 118.0098, GNorm = 0.7347, lr_0 = 6.9065e-04
Loss = 2.6295e-02, PNorm = 118.0585, GNorm = 0.5145, lr_0 = 6.9017e-04
Loss = 2.6339e-02, PNorm = 118.1130, GNorm = 0.3953, lr_0 = 6.8970e-04
Loss = 3.1691e-02, PNorm = 118.1681, GNorm = 0.3263, lr_0 = 6.8923e-04
Loss = 2.6703e-02, PNorm = 118.2202, GNorm = 0.3949, lr_0 = 6.8876e-04
Loss = 2.6471e-02, PNorm = 118.2693, GNorm = 0.4168, lr_0 = 6.8828e-04
Loss = 2.8327e-02, PNorm = 118.3242, GNorm = 0.7064, lr_0 = 6.8781e-04
Loss = 2.7939e-02, PNorm = 118.3785, GNorm = 0.5057, lr_0 = 6.8734e-04
Loss = 2.9474e-02, PNorm = 118.4319, GNorm = 0.6946, lr_0 = 6.8687e-04
Loss = 3.0190e-02, PNorm = 118.4777, GNorm = 0.4772, lr_0 = 6.8640e-04
Loss = 2.6633e-02, PNorm = 118.5357, GNorm = 0.4008, lr_0 = 6.8593e-04
Loss = 2.7451e-02, PNorm = 118.5864, GNorm = 0.6034, lr_0 = 6.8546e-04
Loss = 2.9565e-02, PNorm = 118.6384, GNorm = 0.4243, lr_0 = 6.8499e-04
Loss = 2.7261e-02, PNorm = 118.6829, GNorm = 0.4513, lr_0 = 6.8452e-04
Loss = 3.3461e-02, PNorm = 118.7415, GNorm = 0.4233, lr_0 = 6.8405e-04
Loss = 2.8224e-02, PNorm = 118.8065, GNorm = 0.2834, lr_0 = 6.8358e-04
Loss = 2.9728e-02, PNorm = 118.8714, GNorm = 0.3994, lr_0 = 6.8312e-04
Loss = 2.7124e-02, PNorm = 118.9305, GNorm = 0.4721, lr_0 = 6.8265e-04
Loss = 2.8359e-02, PNorm = 118.9834, GNorm = 0.4621, lr_0 = 6.8218e-04
Loss = 3.1020e-02, PNorm = 119.0421, GNorm = 0.4303, lr_0 = 6.8171e-04
Loss = 2.6837e-02, PNorm = 119.0956, GNorm = 0.1944, lr_0 = 6.8125e-04
Loss = 2.7781e-02, PNorm = 119.1520, GNorm = 0.3551, lr_0 = 6.8078e-04
Loss = 2.4406e-02, PNorm = 119.1988, GNorm = 0.2562, lr_0 = 6.8031e-04
Loss = 2.8721e-02, PNorm = 119.2504, GNorm = 0.3811, lr_0 = 6.7985e-04
Loss = 2.7844e-02, PNorm = 119.3016, GNorm = 0.5968, lr_0 = 6.7938e-04
Loss = 2.9421e-02, PNorm = 119.3622, GNorm = 0.2624, lr_0 = 6.7892e-04
Loss = 2.9381e-02, PNorm = 119.4215, GNorm = 0.2176, lr_0 = 6.7845e-04
Loss = 3.1626e-02, PNorm = 119.4831, GNorm = 0.4737, lr_0 = 6.7799e-04
Loss = 3.1416e-02, PNorm = 119.5416, GNorm = 0.2208, lr_0 = 6.7752e-04
Loss = 2.5103e-02, PNorm = 119.5969, GNorm = 0.5359, lr_0 = 6.7706e-04
Loss = 2.9084e-02, PNorm = 119.6497, GNorm = 0.4149, lr_0 = 6.7659e-04
Loss = 2.6465e-02, PNorm = 119.6995, GNorm = 0.4275, lr_0 = 6.7613e-04
Loss = 2.4383e-02, PNorm = 119.7506, GNorm = 0.3773, lr_0 = 6.7567e-04
Loss = 2.6531e-02, PNorm = 119.8008, GNorm = 0.2459, lr_0 = 6.7520e-04
Loss = 2.9371e-02, PNorm = 119.8509, GNorm = 0.4087, lr_0 = 6.7474e-04
Loss = 3.0765e-02, PNorm = 119.8995, GNorm = 0.4323, lr_0 = 6.7428e-04
Loss = 2.9182e-02, PNorm = 119.9554, GNorm = 0.2747, lr_0 = 6.7382e-04
Loss = 3.2359e-02, PNorm = 120.0121, GNorm = 0.5329, lr_0 = 6.7335e-04
Loss = 3.1286e-02, PNorm = 120.0757, GNorm = 0.8242, lr_0 = 6.7289e-04
Loss = 2.7010e-02, PNorm = 120.1275, GNorm = 0.2650, lr_0 = 6.7243e-04
Loss = 2.6417e-02, PNorm = 120.1755, GNorm = 0.2510, lr_0 = 6.7197e-04
Loss = 3.0529e-02, PNorm = 120.2288, GNorm = 0.7900, lr_0 = 6.7151e-04
Loss = 2.4868e-02, PNorm = 120.2867, GNorm = 0.3384, lr_0 = 6.7105e-04
Loss = 3.2949e-02, PNorm = 120.3421, GNorm = 0.3151, lr_0 = 6.7059e-04
Loss = 2.9870e-02, PNorm = 120.4102, GNorm = 0.2841, lr_0 = 6.7013e-04
Loss = 3.0039e-02, PNorm = 120.4773, GNorm = 0.2227, lr_0 = 6.6967e-04
Loss = 2.9716e-02, PNorm = 120.5356, GNorm = 0.2427, lr_0 = 6.6921e-04
Loss = 2.7156e-02, PNorm = 120.5893, GNorm = 0.5118, lr_0 = 6.6876e-04
Loss = 3.3097e-02, PNorm = 120.6453, GNorm = 0.6941, lr_0 = 6.6830e-04
Loss = 3.1832e-02, PNorm = 120.7015, GNorm = 0.3679, lr_0 = 6.6784e-04
Loss = 3.2903e-02, PNorm = 120.7602, GNorm = 0.5540, lr_0 = 6.6738e-04
Loss = 2.8575e-02, PNorm = 120.8205, GNorm = 0.4025, lr_0 = 6.6693e-04
Loss = 3.1655e-02, PNorm = 120.8835, GNorm = 0.5162, lr_0 = 6.6647e-04
Loss = 3.1064e-02, PNorm = 120.9428, GNorm = 0.6843, lr_0 = 6.6601e-04
Loss = 2.8728e-02, PNorm = 121.0098, GNorm = 0.3189, lr_0 = 6.6556e-04
Loss = 2.8151e-02, PNorm = 121.0679, GNorm = 0.3481, lr_0 = 6.6510e-04
Loss = 3.7446e-02, PNorm = 121.1333, GNorm = 0.4234, lr_0 = 6.6464e-04
Loss = 3.1671e-02, PNorm = 121.1925, GNorm = 0.3200, lr_0 = 6.6419e-04
Loss = 3.2303e-02, PNorm = 121.2533, GNorm = 0.3003, lr_0 = 6.6373e-04
Loss = 3.0743e-02, PNorm = 121.3136, GNorm = 0.5637, lr_0 = 6.6328e-04
Loss = 2.9212e-02, PNorm = 121.3788, GNorm = 0.2347, lr_0 = 6.6282e-04
Validation mae = 0.489552
Epoch 7
Loss = 2.7238e-02, PNorm = 121.4335, GNorm = 0.2352, lr_0 = 6.6237e-04
Loss = 2.4330e-02, PNorm = 121.4839, GNorm = 0.1764, lr_0 = 6.6192e-04
Loss = 2.3625e-02, PNorm = 121.5247, GNorm = 0.2592, lr_0 = 6.6146e-04
Loss = 2.2541e-02, PNorm = 121.5588, GNorm = 0.5554, lr_0 = 6.6101e-04
Loss = 2.6797e-02, PNorm = 121.5906, GNorm = 0.4565, lr_0 = 6.6056e-04
Loss = 1.8598e-02, PNorm = 121.6288, GNorm = 0.2234, lr_0 = 6.6011e-04
Loss = 2.3279e-02, PNorm = 121.6678, GNorm = 0.3620, lr_0 = 6.5965e-04
Loss = 2.2060e-02, PNorm = 121.7095, GNorm = 0.1846, lr_0 = 6.5920e-04
Loss = 2.0517e-02, PNorm = 121.7447, GNorm = 0.2098, lr_0 = 6.5875e-04
Loss = 2.4250e-02, PNorm = 121.7809, GNorm = 0.5186, lr_0 = 6.5830e-04
Loss = 2.4318e-02, PNorm = 121.8242, GNorm = 0.5777, lr_0 = 6.5785e-04
Loss = 2.8600e-02, PNorm = 121.8679, GNorm = 0.4982, lr_0 = 6.5740e-04
Loss = 2.2641e-02, PNorm = 121.9106, GNorm = 0.2657, lr_0 = 6.5695e-04
Loss = 2.1220e-02, PNorm = 121.9517, GNorm = 0.1829, lr_0 = 6.5650e-04
Loss = 2.1975e-02, PNorm = 121.9909, GNorm = 0.2828, lr_0 = 6.5605e-04
Loss = 2.5204e-02, PNorm = 122.0325, GNorm = 0.5981, lr_0 = 6.5560e-04
Loss = 2.5408e-02, PNorm = 122.0657, GNorm = 0.3338, lr_0 = 6.5515e-04
Loss = 2.3169e-02, PNorm = 122.1092, GNorm = 0.1996, lr_0 = 6.5470e-04
Loss = 2.3929e-02, PNorm = 122.1585, GNorm = 0.7406, lr_0 = 6.5425e-04
Loss = 2.2554e-02, PNorm = 122.2115, GNorm = 0.2321, lr_0 = 6.5380e-04
Loss = 2.2963e-02, PNorm = 122.2553, GNorm = 0.1760, lr_0 = 6.5335e-04
Loss = 2.0856e-02, PNorm = 122.3041, GNorm = 0.2271, lr_0 = 6.5291e-04
Loss = 2.4356e-02, PNorm = 122.3463, GNorm = 0.9785, lr_0 = 6.5246e-04
Loss = 2.0310e-02, PNorm = 122.3938, GNorm = 0.7188, lr_0 = 6.5201e-04
Loss = 2.8462e-02, PNorm = 122.4363, GNorm = 0.5903, lr_0 = 6.5157e-04
Loss = 3.1062e-02, PNorm = 122.4845, GNorm = 0.3605, lr_0 = 6.5112e-04
Loss = 2.1064e-02, PNorm = 122.5343, GNorm = 0.5041, lr_0 = 6.5067e-04
Loss = 2.1604e-02, PNorm = 122.5724, GNorm = 0.7346, lr_0 = 6.5023e-04
Loss = 2.3319e-02, PNorm = 122.6135, GNorm = 0.3487, lr_0 = 6.4978e-04
Loss = 2.7139e-02, PNorm = 122.6587, GNorm = 0.3303, lr_0 = 6.4934e-04
Loss = 2.2601e-02, PNorm = 122.7064, GNorm = 0.3246, lr_0 = 6.4889e-04
Loss = 2.0314e-02, PNorm = 122.7517, GNorm = 0.4416, lr_0 = 6.4845e-04
Loss = 2.4124e-02, PNorm = 122.7922, GNorm = 0.5168, lr_0 = 6.4800e-04
Loss = 2.3329e-02, PNorm = 122.8358, GNorm = 0.4689, lr_0 = 6.4756e-04
Loss = 2.1402e-02, PNorm = 122.8829, GNorm = 0.4230, lr_0 = 6.4712e-04
Loss = 2.4946e-02, PNorm = 122.9275, GNorm = 0.2211, lr_0 = 6.4667e-04
Loss = 2.2100e-02, PNorm = 122.9758, GNorm = 0.2747, lr_0 = 6.4623e-04
Loss = 2.0453e-02, PNorm = 123.0199, GNorm = 0.1957, lr_0 = 6.4579e-04
Loss = 2.5136e-02, PNorm = 123.0661, GNorm = 0.5375, lr_0 = 6.4534e-04
Loss = 2.2779e-02, PNorm = 123.1144, GNorm = 0.2535, lr_0 = 6.4490e-04
Loss = 2.3536e-02, PNorm = 123.1599, GNorm = 0.2655, lr_0 = 6.4446e-04
Loss = 2.3620e-02, PNorm = 123.1970, GNorm = 0.4319, lr_0 = 6.4402e-04
Loss = 2.3363e-02, PNorm = 123.2367, GNorm = 0.4065, lr_0 = 6.4358e-04
Loss = 2.0575e-02, PNorm = 123.2807, GNorm = 0.4581, lr_0 = 6.4314e-04
Loss = 2.0716e-02, PNorm = 123.3203, GNorm = 0.2356, lr_0 = 6.4270e-04
Loss = 2.0366e-02, PNorm = 123.3585, GNorm = 0.1804, lr_0 = 6.4226e-04
Loss = 2.0608e-02, PNorm = 123.3982, GNorm = 0.3006, lr_0 = 6.4182e-04
Loss = 2.2061e-02, PNorm = 123.4457, GNorm = 0.3394, lr_0 = 6.4138e-04
Loss = 2.4365e-02, PNorm = 123.4923, GNorm = 0.6140, lr_0 = 6.4094e-04
Loss = 2.2737e-02, PNorm = 123.5389, GNorm = 0.5690, lr_0 = 6.4050e-04
Loss = 2.2451e-02, PNorm = 123.5813, GNorm = 0.2903, lr_0 = 6.4006e-04
Loss = 2.1317e-02, PNorm = 123.6195, GNorm = 0.2901, lr_0 = 6.3962e-04
Loss = 2.3922e-02, PNorm = 123.6637, GNorm = 0.6215, lr_0 = 6.3918e-04
Loss = 2.3556e-02, PNorm = 123.7114, GNorm = 1.0642, lr_0 = 6.3874e-04
Loss = 2.2061e-02, PNorm = 123.7648, GNorm = 0.2610, lr_0 = 6.3831e-04
Loss = 2.1616e-02, PNorm = 123.8163, GNorm = 0.3034, lr_0 = 6.3787e-04
Loss = 2.0275e-02, PNorm = 123.8631, GNorm = 0.1613, lr_0 = 6.3743e-04
Loss = 2.2778e-02, PNorm = 123.9035, GNorm = 0.3602, lr_0 = 6.3700e-04
Loss = 2.3924e-02, PNorm = 123.9481, GNorm = 0.3256, lr_0 = 6.3656e-04
Loss = 2.5463e-02, PNorm = 123.9887, GNorm = 0.3605, lr_0 = 6.3612e-04
Loss = 2.0503e-02, PNorm = 124.0342, GNorm = 0.2992, lr_0 = 6.3569e-04
Loss = 1.9771e-02, PNorm = 124.0786, GNorm = 0.3003, lr_0 = 6.3525e-04
Loss = 2.1215e-02, PNorm = 124.1139, GNorm = 0.4086, lr_0 = 6.3482e-04
Loss = 1.7932e-02, PNorm = 124.1506, GNorm = 0.4301, lr_0 = 6.3438e-04
Loss = 2.0459e-02, PNorm = 124.1939, GNorm = 0.5578, lr_0 = 6.3395e-04
Loss = 1.9833e-02, PNorm = 124.2343, GNorm = 0.4234, lr_0 = 6.3351e-04
Loss = 2.1486e-02, PNorm = 124.2746, GNorm = 0.1885, lr_0 = 6.3308e-04
Loss = 1.9922e-02, PNorm = 124.3156, GNorm = 0.3724, lr_0 = 6.3265e-04
Loss = 2.3039e-02, PNorm = 124.3556, GNorm = 0.4225, lr_0 = 6.3221e-04
Loss = 2.1907e-02, PNorm = 124.3950, GNorm = 0.3538, lr_0 = 6.3178e-04
Loss = 2.0730e-02, PNorm = 124.4429, GNorm = 0.4222, lr_0 = 6.3135e-04
Loss = 2.1624e-02, PNorm = 124.4924, GNorm = 0.2495, lr_0 = 6.3091e-04
Loss = 2.3928e-02, PNorm = 124.5412, GNorm = 0.3485, lr_0 = 6.3048e-04
Loss = 2.0496e-02, PNorm = 124.5865, GNorm = 0.5134, lr_0 = 6.3005e-04
Loss = 2.1102e-02, PNorm = 124.6395, GNorm = 0.2481, lr_0 = 6.2962e-04
Loss = 2.4699e-02, PNorm = 124.6854, GNorm = 0.4543, lr_0 = 6.2919e-04
Loss = 2.1138e-02, PNorm = 124.7321, GNorm = 0.7610, lr_0 = 6.2876e-04
Loss = 2.2505e-02, PNorm = 124.7770, GNorm = 0.2885, lr_0 = 6.2833e-04
Loss = 2.1332e-02, PNorm = 124.8233, GNorm = 0.2470, lr_0 = 6.2789e-04
Loss = 2.3834e-02, PNorm = 124.8696, GNorm = 0.5364, lr_0 = 6.2746e-04
Loss = 2.0898e-02, PNorm = 124.9197, GNorm = 0.7050, lr_0 = 6.2703e-04
Loss = 2.8472e-02, PNorm = 124.9654, GNorm = 0.4918, lr_0 = 6.2661e-04
Loss = 2.0311e-02, PNorm = 125.0124, GNorm = 0.3891, lr_0 = 6.2618e-04
Loss = 2.1483e-02, PNorm = 125.0613, GNorm = 0.2812, lr_0 = 6.2575e-04
Loss = 2.5274e-02, PNorm = 125.1116, GNorm = 0.4118, lr_0 = 6.2532e-04
Loss = 2.5185e-02, PNorm = 125.1640, GNorm = 0.2949, lr_0 = 6.2489e-04
Loss = 2.1152e-02, PNorm = 125.2151, GNorm = 0.2485, lr_0 = 6.2446e-04
Loss = 2.0920e-02, PNorm = 125.2653, GNorm = 0.4514, lr_0 = 6.2403e-04
Loss = 2.2336e-02, PNorm = 125.3109, GNorm = 0.6421, lr_0 = 6.2361e-04
Loss = 2.2253e-02, PNorm = 125.3620, GNorm = 0.2458, lr_0 = 6.2318e-04
Loss = 2.5758e-02, PNorm = 125.4103, GNorm = 0.4781, lr_0 = 6.2275e-04
Loss = 2.3083e-02, PNorm = 125.4619, GNorm = 0.3080, lr_0 = 6.2233e-04
Loss = 2.6077e-02, PNorm = 125.5139, GNorm = 0.7790, lr_0 = 6.2190e-04
Loss = 2.3660e-02, PNorm = 125.5679, GNorm = 0.3308, lr_0 = 6.2147e-04
Loss = 2.6039e-02, PNorm = 125.6185, GNorm = 0.3029, lr_0 = 6.2105e-04
Loss = 2.3532e-02, PNorm = 125.6738, GNorm = 0.3876, lr_0 = 6.2062e-04
Loss = 2.3502e-02, PNorm = 125.7265, GNorm = 0.3359, lr_0 = 6.2020e-04
Loss = 2.0326e-02, PNorm = 125.7762, GNorm = 0.3213, lr_0 = 6.1977e-04
Loss = 2.6534e-02, PNorm = 125.8151, GNorm = 0.3274, lr_0 = 6.1935e-04
Loss = 1.9578e-02, PNorm = 125.8557, GNorm = 0.3151, lr_0 = 6.1892e-04
Loss = 2.2717e-02, PNorm = 125.8981, GNorm = 0.2936, lr_0 = 6.1850e-04
Loss = 2.4388e-02, PNorm = 125.9428, GNorm = 0.4204, lr_0 = 6.1808e-04
Loss = 2.4610e-02, PNorm = 125.9903, GNorm = 0.5279, lr_0 = 6.1765e-04
Loss = 2.0653e-02, PNorm = 126.0392, GNorm = 0.4079, lr_0 = 6.1723e-04
Loss = 2.6119e-02, PNorm = 126.0850, GNorm = 0.2707, lr_0 = 6.1681e-04
Loss = 2.4381e-02, PNorm = 126.1327, GNorm = 0.2100, lr_0 = 6.1638e-04
Loss = 2.1968e-02, PNorm = 126.1871, GNorm = 0.1555, lr_0 = 6.1596e-04
Loss = 2.4799e-02, PNorm = 126.2388, GNorm = 0.4682, lr_0 = 6.1554e-04
Loss = 1.9735e-02, PNorm = 126.2898, GNorm = 0.4321, lr_0 = 6.1512e-04
Loss = 2.2799e-02, PNorm = 126.3377, GNorm = 0.2394, lr_0 = 6.1470e-04
Loss = 2.3291e-02, PNorm = 126.3813, GNorm = 0.2723, lr_0 = 6.1428e-04
Loss = 2.3106e-02, PNorm = 126.4295, GNorm = 0.4606, lr_0 = 6.1385e-04
Loss = 2.1748e-02, PNorm = 126.4785, GNorm = 0.4490, lr_0 = 6.1343e-04
Loss = 2.3234e-02, PNorm = 126.5233, GNorm = 0.1455, lr_0 = 6.1301e-04
Loss = 2.6026e-02, PNorm = 126.5719, GNorm = 0.2916, lr_0 = 6.1259e-04
Loss = 2.3240e-02, PNorm = 126.6240, GNorm = 0.1567, lr_0 = 6.1217e-04
Loss = 2.4638e-02, PNorm = 126.6722, GNorm = 0.6805, lr_0 = 6.1175e-04
Loss = 2.1124e-02, PNorm = 126.7207, GNorm = 0.2424, lr_0 = 6.1134e-04
Loss = 2.3112e-02, PNorm = 126.7669, GNorm = 0.2122, lr_0 = 6.1092e-04
Loss = 2.3821e-02, PNorm = 126.8155, GNorm = 0.4236, lr_0 = 6.1050e-04
Validation mae = 0.486971
Epoch 8
Loss = 2.0401e-02, PNorm = 126.8607, GNorm = 0.3315, lr_0 = 6.1008e-04
Loss = 1.7878e-02, PNorm = 126.8987, GNorm = 0.1278, lr_0 = 6.0966e-04
Loss = 2.4618e-02, PNorm = 126.9385, GNorm = 0.6837, lr_0 = 6.0924e-04
Loss = 1.9596e-02, PNorm = 126.9729, GNorm = 0.7581, lr_0 = 6.0883e-04
Loss = 2.2975e-02, PNorm = 127.0074, GNorm = 0.2605, lr_0 = 6.0841e-04
Loss = 1.9572e-02, PNorm = 127.0494, GNorm = 0.6137, lr_0 = 6.0799e-04
Loss = 2.0018e-02, PNorm = 127.0916, GNorm = 0.2510, lr_0 = 6.0758e-04
Loss = 1.6820e-02, PNorm = 127.1268, GNorm = 0.1444, lr_0 = 6.0716e-04
Loss = 1.7975e-02, PNorm = 127.1592, GNorm = 0.2321, lr_0 = 6.0674e-04
Loss = 1.8880e-02, PNorm = 127.1891, GNorm = 0.2379, lr_0 = 6.0633e-04
Loss = 1.8496e-02, PNorm = 127.2204, GNorm = 0.4596, lr_0 = 6.0591e-04
Loss = 1.9313e-02, PNorm = 127.2584, GNorm = 0.1836, lr_0 = 6.0550e-04
Loss = 1.8244e-02, PNorm = 127.2956, GNorm = 0.1364, lr_0 = 6.0508e-04
Loss = 2.0075e-02, PNorm = 127.3264, GNorm = 0.5101, lr_0 = 6.0467e-04
Loss = 1.7986e-02, PNorm = 127.3583, GNorm = 0.3732, lr_0 = 6.0425e-04
Loss = 2.0918e-02, PNorm = 127.3942, GNorm = 0.4915, lr_0 = 6.0384e-04
Loss = 1.6808e-02, PNorm = 127.4250, GNorm = 0.5389, lr_0 = 6.0343e-04
Loss = 1.7535e-02, PNorm = 127.4562, GNorm = 0.2028, lr_0 = 6.0301e-04
Loss = 1.8243e-02, PNorm = 127.4895, GNorm = 0.7352, lr_0 = 6.0260e-04
Loss = 1.6248e-02, PNorm = 127.5277, GNorm = 0.1899, lr_0 = 6.0219e-04
Loss = 1.7514e-02, PNorm = 127.5601, GNorm = 0.2186, lr_0 = 6.0178e-04
Loss = 1.7203e-02, PNorm = 127.5892, GNorm = 0.1739, lr_0 = 6.0136e-04
Loss = 1.5712e-02, PNorm = 127.6156, GNorm = 0.3748, lr_0 = 6.0095e-04
Loss = 1.7336e-02, PNorm = 127.6447, GNorm = 0.3256, lr_0 = 6.0054e-04
Loss = 1.5802e-02, PNorm = 127.6793, GNorm = 0.2553, lr_0 = 6.0013e-04
Loss = 2.2950e-02, PNorm = 127.7133, GNorm = 0.3984, lr_0 = 5.9972e-04
Loss = 1.8682e-02, PNorm = 127.7567, GNorm = 0.2457, lr_0 = 5.9931e-04
Loss = 1.8177e-02, PNorm = 127.7979, GNorm = 0.2796, lr_0 = 5.9890e-04
Loss = 2.0154e-02, PNorm = 127.8365, GNorm = 0.5597, lr_0 = 5.9849e-04
Loss = 2.0771e-02, PNorm = 127.8751, GNorm = 0.5632, lr_0 = 5.9808e-04
Loss = 1.7157e-02, PNorm = 127.9035, GNorm = 0.4000, lr_0 = 5.9767e-04
Loss = 1.7788e-02, PNorm = 127.9450, GNorm = 0.2459, lr_0 = 5.9726e-04
Loss = 1.8398e-02, PNorm = 127.9772, GNorm = 0.5065, lr_0 = 5.9685e-04
Loss = 1.6712e-02, PNorm = 128.0111, GNorm = 0.2837, lr_0 = 5.9644e-04
Loss = 1.7367e-02, PNorm = 128.0446, GNorm = 0.2119, lr_0 = 5.9603e-04
Loss = 1.8765e-02, PNorm = 128.0810, GNorm = 0.4115, lr_0 = 5.9562e-04
Loss = 1.6233e-02, PNorm = 128.1170, GNorm = 0.4078, lr_0 = 5.9521e-04
Loss = 1.7848e-02, PNorm = 128.1503, GNorm = 0.3603, lr_0 = 5.9481e-04
Loss = 1.5627e-02, PNorm = 128.1881, GNorm = 0.2136, lr_0 = 5.9440e-04
Loss = 1.7283e-02, PNorm = 128.2181, GNorm = 0.2153, lr_0 = 5.9399e-04
Loss = 1.6425e-02, PNorm = 128.2529, GNorm = 0.6790, lr_0 = 5.9358e-04
Loss = 2.0914e-02, PNorm = 128.2906, GNorm = 0.3832, lr_0 = 5.9318e-04
Loss = 1.9806e-02, PNorm = 128.3324, GNorm = 0.6098, lr_0 = 5.9277e-04
Loss = 2.4350e-02, PNorm = 128.3649, GNorm = 0.3363, lr_0 = 5.9236e-04
Loss = 1.7528e-02, PNorm = 128.4052, GNorm = 0.2322, lr_0 = 5.9196e-04
Loss = 1.8876e-02, PNorm = 128.4481, GNorm = 0.1841, lr_0 = 5.9155e-04
Loss = 1.8156e-02, PNorm = 128.4914, GNorm = 0.1519, lr_0 = 5.9115e-04
Loss = 1.6455e-02, PNorm = 128.5277, GNorm = 0.6483, lr_0 = 5.9074e-04
Loss = 1.9171e-02, PNorm = 128.5660, GNorm = 0.2131, lr_0 = 5.9034e-04
Loss = 1.9821e-02, PNorm = 128.6131, GNorm = 0.3699, lr_0 = 5.8993e-04
Loss = 2.2973e-02, PNorm = 128.6604, GNorm = 0.1397, lr_0 = 5.8953e-04
Loss = 1.7314e-02, PNorm = 128.7038, GNorm = 0.3468, lr_0 = 5.8913e-04
Loss = 1.9671e-02, PNorm = 128.7435, GNorm = 0.3044, lr_0 = 5.8872e-04
Loss = 1.8536e-02, PNorm = 128.7867, GNorm = 0.2797, lr_0 = 5.8832e-04
Loss = 1.7021e-02, PNorm = 128.8297, GNorm = 0.2548, lr_0 = 5.8792e-04
Loss = 1.7358e-02, PNorm = 128.8655, GNorm = 0.1241, lr_0 = 5.8751e-04
Loss = 1.8879e-02, PNorm = 128.9011, GNorm = 0.3428, lr_0 = 5.8711e-04
Loss = 1.8924e-02, PNorm = 128.9391, GNorm = 0.4538, lr_0 = 5.8671e-04
Loss = 1.8038e-02, PNorm = 128.9738, GNorm = 0.3478, lr_0 = 5.8631e-04
Loss = 1.7092e-02, PNorm = 129.0093, GNorm = 0.3459, lr_0 = 5.8591e-04
Loss = 1.8476e-02, PNorm = 129.0464, GNorm = 0.3141, lr_0 = 5.8550e-04
Loss = 2.2451e-02, PNorm = 129.0882, GNorm = 0.2736, lr_0 = 5.8510e-04
Loss = 1.9478e-02, PNorm = 129.1278, GNorm = 0.1904, lr_0 = 5.8470e-04
Loss = 1.6760e-02, PNorm = 129.1714, GNorm = 0.2959, lr_0 = 5.8430e-04
Loss = 2.0015e-02, PNorm = 129.2113, GNorm = 0.4066, lr_0 = 5.8390e-04
Loss = 1.4299e-02, PNorm = 129.2486, GNorm = 0.1796, lr_0 = 5.8350e-04
Loss = 2.0978e-02, PNorm = 129.2844, GNorm = 0.3595, lr_0 = 5.8310e-04
Loss = 1.6120e-02, PNorm = 129.3247, GNorm = 0.4417, lr_0 = 5.8270e-04
Loss = 1.4916e-02, PNorm = 129.3696, GNorm = 0.2018, lr_0 = 5.8230e-04
Loss = 1.9346e-02, PNorm = 129.4104, GNorm = 0.4616, lr_0 = 5.8190e-04
Loss = 1.7669e-02, PNorm = 129.4535, GNorm = 0.2419, lr_0 = 5.8151e-04
Loss = 2.0416e-02, PNorm = 129.4952, GNorm = 0.5625, lr_0 = 5.8111e-04
Loss = 1.6998e-02, PNorm = 129.5355, GNorm = 0.2714, lr_0 = 5.8071e-04
Loss = 1.7030e-02, PNorm = 129.5722, GNorm = 0.2207, lr_0 = 5.8031e-04
Loss = 1.8099e-02, PNorm = 129.6094, GNorm = 0.3479, lr_0 = 5.7991e-04
Loss = 1.8858e-02, PNorm = 129.6498, GNorm = 0.3968, lr_0 = 5.7952e-04
Loss = 1.6600e-02, PNorm = 129.6924, GNorm = 0.2887, lr_0 = 5.7912e-04
Loss = 1.9386e-02, PNorm = 129.7347, GNorm = 0.3071, lr_0 = 5.7872e-04
Loss = 1.8471e-02, PNorm = 129.7767, GNorm = 0.3572, lr_0 = 5.7833e-04
Loss = 2.0450e-02, PNorm = 129.8147, GNorm = 0.1578, lr_0 = 5.7793e-04
Loss = 1.9721e-02, PNorm = 129.8526, GNorm = 0.2091, lr_0 = 5.7753e-04
Loss = 2.0211e-02, PNorm = 129.8881, GNorm = 0.5262, lr_0 = 5.7714e-04
Loss = 1.8148e-02, PNorm = 129.9331, GNorm = 0.7499, lr_0 = 5.7674e-04
Loss = 1.9904e-02, PNorm = 129.9761, GNorm = 0.6512, lr_0 = 5.7635e-04
Loss = 1.7112e-02, PNorm = 130.0188, GNorm = 0.6890, lr_0 = 5.7595e-04
Loss = 1.8273e-02, PNorm = 130.0588, GNorm = 0.2727, lr_0 = 5.7556e-04
Loss = 1.8777e-02, PNorm = 130.1041, GNorm = 0.6840, lr_0 = 5.7516e-04
Loss = 1.7799e-02, PNorm = 130.1477, GNorm = 0.4877, lr_0 = 5.7477e-04
Loss = 1.6255e-02, PNorm = 130.1853, GNorm = 0.6231, lr_0 = 5.7438e-04
Loss = 1.7164e-02, PNorm = 130.2276, GNorm = 0.4058, lr_0 = 5.7398e-04
Loss = 1.8119e-02, PNorm = 130.2691, GNorm = 0.1736, lr_0 = 5.7359e-04
Loss = 1.5980e-02, PNorm = 130.3071, GNorm = 0.4180, lr_0 = 5.7320e-04
Loss = 1.6587e-02, PNorm = 130.3438, GNorm = 0.2475, lr_0 = 5.7280e-04
Loss = 1.9401e-02, PNorm = 130.3779, GNorm = 0.3454, lr_0 = 5.7241e-04
Loss = 2.0289e-02, PNorm = 130.4222, GNorm = 0.3662, lr_0 = 5.7202e-04
Loss = 1.6213e-02, PNorm = 130.4665, GNorm = 0.4553, lr_0 = 5.7163e-04
Loss = 1.7770e-02, PNorm = 130.5124, GNorm = 0.1608, lr_0 = 5.7124e-04
Loss = 1.7156e-02, PNorm = 130.5544, GNorm = 0.4804, lr_0 = 5.7084e-04
Loss = 2.2546e-02, PNorm = 130.5951, GNorm = 0.4496, lr_0 = 5.7045e-04
Loss = 2.1403e-02, PNorm = 130.6466, GNorm = 0.7766, lr_0 = 5.7006e-04
Loss = 1.8795e-02, PNorm = 130.6966, GNorm = 0.3637, lr_0 = 5.6967e-04
Loss = 1.8221e-02, PNorm = 130.7414, GNorm = 0.4310, lr_0 = 5.6928e-04
Loss = 1.8425e-02, PNorm = 130.7861, GNorm = 0.2130, lr_0 = 5.6889e-04
Loss = 1.8007e-02, PNorm = 130.8306, GNorm = 0.2960, lr_0 = 5.6850e-04
Loss = 2.1292e-02, PNorm = 130.8782, GNorm = 0.3401, lr_0 = 5.6811e-04
Loss = 1.7467e-02, PNorm = 130.9220, GNorm = 0.2656, lr_0 = 5.6772e-04
Loss = 2.0788e-02, PNorm = 130.9608, GNorm = 0.2462, lr_0 = 5.6733e-04
Loss = 2.1049e-02, PNorm = 131.0001, GNorm = 0.4948, lr_0 = 5.6695e-04
Loss = 1.6860e-02, PNorm = 131.0445, GNorm = 0.2630, lr_0 = 5.6656e-04
Loss = 1.8192e-02, PNorm = 131.0882, GNorm = 0.3062, lr_0 = 5.6617e-04
Loss = 1.9480e-02, PNorm = 131.1317, GNorm = 0.2059, lr_0 = 5.6578e-04
Loss = 1.7843e-02, PNorm = 131.1726, GNorm = 0.1368, lr_0 = 5.6539e-04
Loss = 1.7542e-02, PNorm = 131.2089, GNorm = 0.4314, lr_0 = 5.6501e-04
Loss = 1.7260e-02, PNorm = 131.2494, GNorm = 0.3136, lr_0 = 5.6462e-04
Loss = 1.8749e-02, PNorm = 131.2907, GNorm = 0.3645, lr_0 = 5.6423e-04
Loss = 1.8872e-02, PNorm = 131.3319, GNorm = 0.4176, lr_0 = 5.6385e-04
Loss = 1.9541e-02, PNorm = 131.3738, GNorm = 0.4668, lr_0 = 5.6346e-04
Loss = 1.9553e-02, PNorm = 131.4160, GNorm = 0.3116, lr_0 = 5.6307e-04
Loss = 2.0360e-02, PNorm = 131.4551, GNorm = 0.3672, lr_0 = 5.6269e-04
Loss = 1.9710e-02, PNorm = 131.4994, GNorm = 0.1467, lr_0 = 5.6230e-04
Validation mae = 0.483269
Epoch 9
Loss = 1.6662e-02, PNorm = 131.5361, GNorm = 0.2244, lr_0 = 5.6192e-04
Loss = 1.4473e-02, PNorm = 131.5663, GNorm = 0.1629, lr_0 = 5.6153e-04
Loss = 1.4664e-02, PNorm = 131.5950, GNorm = 0.1277, lr_0 = 5.6115e-04
Loss = 1.6009e-02, PNorm = 131.6230, GNorm = 0.3759, lr_0 = 5.6076e-04
Loss = 1.5973e-02, PNorm = 131.6519, GNorm = 0.2876, lr_0 = 5.6038e-04
Loss = 1.7906e-02, PNorm = 131.6825, GNorm = 0.3107, lr_0 = 5.6000e-04
Loss = 1.5364e-02, PNorm = 131.7113, GNorm = 0.3369, lr_0 = 5.5961e-04
Loss = 1.6520e-02, PNorm = 131.7374, GNorm = 0.1724, lr_0 = 5.5923e-04
Loss = 1.6546e-02, PNorm = 131.7686, GNorm = 0.2158, lr_0 = 5.5885e-04
Loss = 1.5518e-02, PNorm = 131.7967, GNorm = 0.3656, lr_0 = 5.5846e-04
Loss = 1.5077e-02, PNorm = 131.8228, GNorm = 0.2730, lr_0 = 5.5808e-04
Loss = 1.6973e-02, PNorm = 131.8541, GNorm = 0.4773, lr_0 = 5.5770e-04
Loss = 1.3907e-02, PNorm = 131.8762, GNorm = 0.4311, lr_0 = 5.5732e-04
Loss = 1.4036e-02, PNorm = 131.9016, GNorm = 0.7134, lr_0 = 5.5693e-04
Loss = 1.4705e-02, PNorm = 131.9254, GNorm = 0.4344, lr_0 = 5.5655e-04
Loss = 1.6529e-02, PNorm = 131.9485, GNorm = 0.1753, lr_0 = 5.5617e-04
Loss = 1.4667e-02, PNorm = 131.9745, GNorm = 0.2945, lr_0 = 5.5579e-04
Loss = 1.4063e-02, PNorm = 132.0045, GNorm = 0.2726, lr_0 = 5.5541e-04
Loss = 1.3330e-02, PNorm = 132.0347, GNorm = 0.2028, lr_0 = 5.5503e-04
Loss = 1.4093e-02, PNorm = 132.0618, GNorm = 0.2968, lr_0 = 5.5465e-04
Loss = 1.2827e-02, PNorm = 132.0910, GNorm = 0.2989, lr_0 = 5.5427e-04
Loss = 1.4030e-02, PNorm = 132.1239, GNorm = 0.3376, lr_0 = 5.5389e-04
Loss = 1.5709e-02, PNorm = 132.1544, GNorm = 0.2771, lr_0 = 5.5351e-04
Loss = 1.4512e-02, PNorm = 132.1830, GNorm = 0.2216, lr_0 = 5.5313e-04
Loss = 1.3528e-02, PNorm = 132.2144, GNorm = 0.3407, lr_0 = 5.5275e-04
Loss = 1.5490e-02, PNorm = 132.2457, GNorm = 0.3498, lr_0 = 5.5237e-04
Loss = 1.5637e-02, PNorm = 132.2737, GNorm = 0.3264, lr_0 = 5.5199e-04
Loss = 1.4445e-02, PNorm = 132.3023, GNorm = 0.2327, lr_0 = 5.5162e-04
Loss = 1.5054e-02, PNorm = 132.3366, GNorm = 0.1229, lr_0 = 5.5124e-04
Loss = 1.4887e-02, PNorm = 132.3714, GNorm = 0.1253, lr_0 = 5.5086e-04
Loss = 1.3520e-02, PNorm = 132.4048, GNorm = 0.2969, lr_0 = 5.5048e-04
Loss = 1.5917e-02, PNorm = 132.4298, GNorm = 0.1658, lr_0 = 5.5011e-04
Loss = 1.3237e-02, PNorm = 132.4618, GNorm = 0.6105, lr_0 = 5.4973e-04
Loss = 1.4504e-02, PNorm = 132.4900, GNorm = 0.1464, lr_0 = 5.4935e-04
Loss = 1.2292e-02, PNorm = 132.5153, GNorm = 0.3648, lr_0 = 5.4898e-04
Loss = 1.8304e-02, PNorm = 132.5439, GNorm = 0.1716, lr_0 = 5.4860e-04
Loss = 1.5856e-02, PNorm = 132.5749, GNorm = 0.2129, lr_0 = 5.4822e-04
Loss = 1.4384e-02, PNorm = 132.6055, GNorm = 0.3964, lr_0 = 5.4785e-04
Loss = 1.4927e-02, PNorm = 132.6370, GNorm = 0.1546, lr_0 = 5.4747e-04
Loss = 1.4044e-02, PNorm = 132.6730, GNorm = 0.3333, lr_0 = 5.4710e-04
Loss = 1.5626e-02, PNorm = 132.7056, GNorm = 0.5056, lr_0 = 5.4672e-04
Loss = 1.2555e-02, PNorm = 132.7417, GNorm = 0.6049, lr_0 = 5.4635e-04
Loss = 1.6226e-02, PNorm = 132.7698, GNorm = 0.5439, lr_0 = 5.4597e-04
Loss = 1.5749e-02, PNorm = 132.8070, GNorm = 0.6798, lr_0 = 5.4560e-04
Loss = 1.5770e-02, PNorm = 132.8479, GNorm = 0.2166, lr_0 = 5.4523e-04
Loss = 1.3730e-02, PNorm = 132.8839, GNorm = 0.3364, lr_0 = 5.4485e-04
Loss = 1.5529e-02, PNorm = 132.9122, GNorm = 0.3133, lr_0 = 5.4448e-04
Loss = 1.8018e-02, PNorm = 132.9413, GNorm = 0.1376, lr_0 = 5.4411e-04
Loss = 1.4084e-02, PNorm = 132.9668, GNorm = 0.5536, lr_0 = 5.4373e-04
Loss = 1.4036e-02, PNorm = 132.9983, GNorm = 0.3690, lr_0 = 5.4336e-04
Loss = 1.3093e-02, PNorm = 133.0292, GNorm = 0.3528, lr_0 = 5.4299e-04
Loss = 1.3283e-02, PNorm = 133.0625, GNorm = 0.4277, lr_0 = 5.4262e-04
Loss = 1.7528e-02, PNorm = 133.0909, GNorm = 0.3041, lr_0 = 5.4225e-04
Loss = 1.6660e-02, PNorm = 133.1235, GNorm = 0.2631, lr_0 = 5.4187e-04
Loss = 1.4732e-02, PNorm = 133.1572, GNorm = 0.2097, lr_0 = 5.4150e-04
Loss = 1.5577e-02, PNorm = 133.1947, GNorm = 0.2390, lr_0 = 5.4113e-04
Loss = 1.4488e-02, PNorm = 133.2306, GNorm = 0.2467, lr_0 = 5.4076e-04
Loss = 1.2709e-02, PNorm = 133.2708, GNorm = 0.1452, lr_0 = 5.4039e-04
Loss = 1.5220e-02, PNorm = 133.3019, GNorm = 0.1423, lr_0 = 5.4002e-04
Loss = 2.1940e-02, PNorm = 133.3385, GNorm = 0.2398, lr_0 = 5.3965e-04
Loss = 1.3880e-02, PNorm = 133.3733, GNorm = 0.5044, lr_0 = 5.3928e-04
Loss = 1.4120e-02, PNorm = 133.4086, GNorm = 0.3675, lr_0 = 5.3891e-04
Loss = 1.3559e-02, PNorm = 133.4457, GNorm = 0.6043, lr_0 = 5.3854e-04
Loss = 1.7968e-02, PNorm = 133.4785, GNorm = 0.3538, lr_0 = 5.3817e-04
Loss = 1.5495e-02, PNorm = 133.5175, GNorm = 0.2694, lr_0 = 5.3781e-04
Loss = 1.2385e-02, PNorm = 133.5571, GNorm = 0.2197, lr_0 = 5.3744e-04
Loss = 1.2652e-02, PNorm = 133.5879, GNorm = 0.4801, lr_0 = 5.3707e-04
Loss = 1.5512e-02, PNorm = 133.6179, GNorm = 0.3526, lr_0 = 5.3670e-04
Loss = 1.3737e-02, PNorm = 133.6530, GNorm = 0.2709, lr_0 = 5.3633e-04
Loss = 1.2468e-02, PNorm = 133.6859, GNorm = 0.2423, lr_0 = 5.3597e-04
Loss = 1.3683e-02, PNorm = 133.7127, GNorm = 0.4520, lr_0 = 5.3560e-04
Loss = 2.1234e-02, PNorm = 133.7394, GNorm = 0.2823, lr_0 = 5.3523e-04
Loss = 1.5489e-02, PNorm = 133.7711, GNorm = 0.2787, lr_0 = 5.3486e-04
Loss = 2.3107e-02, PNorm = 133.8135, GNorm = 0.6590, lr_0 = 5.3450e-04
Loss = 1.5447e-02, PNorm = 133.8525, GNorm = 0.3798, lr_0 = 5.3413e-04
Loss = 1.4216e-02, PNorm = 133.8886, GNorm = 0.1267, lr_0 = 5.3377e-04
Loss = 1.4301e-02, PNorm = 133.9229, GNorm = 0.6585, lr_0 = 5.3340e-04
Loss = 1.9257e-02, PNorm = 133.9561, GNorm = 0.5164, lr_0 = 5.3304e-04
Loss = 1.6240e-02, PNorm = 133.9916, GNorm = 0.2646, lr_0 = 5.3267e-04
Loss = 1.3560e-02, PNorm = 134.0286, GNorm = 0.3431, lr_0 = 5.3231e-04
Loss = 1.7818e-02, PNorm = 134.0599, GNorm = 0.2375, lr_0 = 5.3194e-04
Loss = 1.3248e-02, PNorm = 134.0917, GNorm = 0.4402, lr_0 = 5.3158e-04
Loss = 1.6280e-02, PNorm = 134.1259, GNorm = 0.2317, lr_0 = 5.3121e-04
Loss = 1.7199e-02, PNorm = 134.1627, GNorm = 0.4748, lr_0 = 5.3085e-04
Loss = 1.2736e-02, PNorm = 134.2025, GNorm = 0.2491, lr_0 = 5.3048e-04
Loss = 1.7950e-02, PNorm = 134.2336, GNorm = 0.9184, lr_0 = 5.3012e-04
Loss = 1.8785e-02, PNorm = 134.2624, GNorm = 0.2969, lr_0 = 5.2976e-04
Loss = 1.4568e-02, PNorm = 134.2949, GNorm = 0.1711, lr_0 = 5.2939e-04
Loss = 1.4211e-02, PNorm = 134.3302, GNorm = 0.3524, lr_0 = 5.2903e-04
Loss = 1.3602e-02, PNorm = 134.3695, GNorm = 0.4202, lr_0 = 5.2867e-04
Loss = 1.6079e-02, PNorm = 134.4058, GNorm = 0.2284, lr_0 = 5.2831e-04
Loss = 1.4323e-02, PNorm = 134.4446, GNorm = 0.2320, lr_0 = 5.2795e-04
Loss = 1.8375e-02, PNorm = 134.4789, GNorm = 0.2563, lr_0 = 5.2758e-04
Loss = 1.4937e-02, PNorm = 134.5111, GNorm = 0.4595, lr_0 = 5.2722e-04
Loss = 1.2880e-02, PNorm = 134.5424, GNorm = 0.2613, lr_0 = 5.2686e-04
Loss = 1.5585e-02, PNorm = 134.5726, GNorm = 0.4230, lr_0 = 5.2650e-04
Loss = 1.5880e-02, PNorm = 134.6120, GNorm = 0.1888, lr_0 = 5.2614e-04
Loss = 1.6202e-02, PNorm = 134.6466, GNorm = 0.4564, lr_0 = 5.2578e-04
Loss = 1.3846e-02, PNorm = 134.6799, GNorm = 0.3870, lr_0 = 5.2542e-04
Loss = 1.8545e-02, PNorm = 134.7181, GNorm = 0.1460, lr_0 = 5.2506e-04
Loss = 1.5392e-02, PNorm = 134.7526, GNorm = 0.4242, lr_0 = 5.2470e-04
Loss = 1.7040e-02, PNorm = 134.7905, GNorm = 0.3895, lr_0 = 5.2434e-04
Loss = 1.6808e-02, PNorm = 134.8304, GNorm = 0.5791, lr_0 = 5.2398e-04
Loss = 2.0786e-02, PNorm = 134.8692, GNorm = 0.2583, lr_0 = 5.2362e-04
Loss = 1.6210e-02, PNorm = 134.9050, GNorm = 0.2511, lr_0 = 5.2326e-04
Loss = 1.5954e-02, PNorm = 134.9411, GNorm = 0.4005, lr_0 = 5.2290e-04
Loss = 1.6061e-02, PNorm = 134.9757, GNorm = 0.5314, lr_0 = 5.2255e-04
Loss = 1.4641e-02, PNorm = 135.0095, GNorm = 0.2083, lr_0 = 5.2219e-04
Loss = 1.3754e-02, PNorm = 135.0447, GNorm = 0.7267, lr_0 = 5.2183e-04
Loss = 1.4982e-02, PNorm = 135.0768, GNorm = 0.4965, lr_0 = 5.2147e-04
Loss = 1.5220e-02, PNorm = 135.1115, GNorm = 0.1688, lr_0 = 5.2112e-04
Loss = 1.4891e-02, PNorm = 135.1457, GNorm = 0.8766, lr_0 = 5.2076e-04
Loss = 1.8112e-02, PNorm = 135.1800, GNorm = 0.1955, lr_0 = 5.2040e-04
Loss = 1.6814e-02, PNorm = 135.2170, GNorm = 0.4898, lr_0 = 5.2005e-04
Loss = 1.6077e-02, PNorm = 135.2533, GNorm = 0.1244, lr_0 = 5.1969e-04
Loss = 1.3213e-02, PNorm = 135.2883, GNorm = 0.3168, lr_0 = 5.1933e-04
Loss = 1.4779e-02, PNorm = 135.3209, GNorm = 0.3332, lr_0 = 5.1898e-04
Loss = 1.6287e-02, PNorm = 135.3544, GNorm = 0.2916, lr_0 = 5.1862e-04
Loss = 1.5004e-02, PNorm = 135.3854, GNorm = 0.5080, lr_0 = 5.1827e-04
Loss = 1.5589e-02, PNorm = 135.4274, GNorm = 0.3354, lr_0 = 5.1791e-04
Validation mae = 0.484656
Epoch 10
Loss = 1.3561e-02, PNorm = 135.4617, GNorm = 0.1500, lr_0 = 5.1756e-04
Loss = 1.3791e-02, PNorm = 135.4887, GNorm = 0.1895, lr_0 = 5.1720e-04
Loss = 1.4310e-02, PNorm = 135.5091, GNorm = 0.2199, lr_0 = 5.1685e-04
Loss = 1.2933e-02, PNorm = 135.5299, GNorm = 0.2879, lr_0 = 5.1649e-04
Loss = 1.3540e-02, PNorm = 135.5550, GNorm = 0.4906, lr_0 = 5.1614e-04
Loss = 1.1117e-02, PNorm = 135.5812, GNorm = 0.3665, lr_0 = 5.1579e-04
Loss = 1.3194e-02, PNorm = 135.5968, GNorm = 0.4160, lr_0 = 5.1543e-04
Loss = 1.1740e-02, PNorm = 135.6165, GNorm = 0.3082, lr_0 = 5.1508e-04
Loss = 1.5650e-02, PNorm = 135.6444, GNorm = 0.2221, lr_0 = 5.1473e-04
Loss = 1.2044e-02, PNorm = 135.6711, GNorm = 0.3570, lr_0 = 5.1437e-04
Loss = 1.4261e-02, PNorm = 135.6986, GNorm = 1.0493, lr_0 = 5.1402e-04
Loss = 1.1883e-02, PNorm = 135.7223, GNorm = 0.2220, lr_0 = 5.1367e-04
Loss = 1.0839e-02, PNorm = 135.7501, GNorm = 0.2573, lr_0 = 5.1332e-04
Loss = 1.2132e-02, PNorm = 135.7739, GNorm = 0.3441, lr_0 = 5.1297e-04
Loss = 1.3447e-02, PNorm = 135.7974, GNorm = 0.3181, lr_0 = 5.1262e-04
Loss = 1.2603e-02, PNorm = 135.8179, GNorm = 0.2540, lr_0 = 5.1226e-04
Loss = 1.2553e-02, PNorm = 135.8430, GNorm = 0.2147, lr_0 = 5.1191e-04
Loss = 1.1652e-02, PNorm = 135.8675, GNorm = 0.3895, lr_0 = 5.1156e-04
Loss = 1.1424e-02, PNorm = 135.8901, GNorm = 0.1295, lr_0 = 5.1121e-04
Loss = 1.0089e-02, PNorm = 135.9140, GNorm = 0.4252, lr_0 = 5.1086e-04
Loss = 1.3453e-02, PNorm = 135.9372, GNorm = 0.3281, lr_0 = 5.1051e-04
Loss = 1.1767e-02, PNorm = 135.9610, GNorm = 0.1729, lr_0 = 5.1016e-04
Loss = 1.1861e-02, PNorm = 135.9871, GNorm = 0.4251, lr_0 = 5.0981e-04
Loss = 1.0565e-02, PNorm = 136.0097, GNorm = 0.2338, lr_0 = 5.0946e-04
Loss = 1.0754e-02, PNorm = 136.0352, GNorm = 0.1710, lr_0 = 5.0911e-04
Loss = 1.2768e-02, PNorm = 136.0599, GNorm = 0.5216, lr_0 = 5.0877e-04
Loss = 1.1874e-02, PNorm = 136.0826, GNorm = 0.1202, lr_0 = 5.0842e-04
Loss = 1.3092e-02, PNorm = 136.1041, GNorm = 0.6224, lr_0 = 5.0807e-04
Loss = 1.3633e-02, PNorm = 136.1297, GNorm = 0.2635, lr_0 = 5.0772e-04
Loss = 1.1849e-02, PNorm = 136.1594, GNorm = 0.1234, lr_0 = 5.0737e-04
Loss = 1.3164e-02, PNorm = 136.1825, GNorm = 0.4477, lr_0 = 5.0703e-04
Loss = 1.0997e-02, PNorm = 136.2081, GNorm = 0.2550, lr_0 = 5.0668e-04
Loss = 1.2786e-02, PNorm = 136.2366, GNorm = 0.5363, lr_0 = 5.0633e-04
Loss = 1.1697e-02, PNorm = 136.2579, GNorm = 0.2989, lr_0 = 5.0598e-04
Loss = 1.1679e-02, PNorm = 136.2900, GNorm = 0.2791, lr_0 = 5.0564e-04
Loss = 1.3100e-02, PNorm = 136.3140, GNorm = 0.2673, lr_0 = 5.0529e-04
Loss = 1.0724e-02, PNorm = 136.3378, GNorm = 0.3417, lr_0 = 5.0494e-04
Loss = 1.3834e-02, PNorm = 136.3566, GNorm = 0.2196, lr_0 = 5.0460e-04
Loss = 1.1959e-02, PNorm = 136.3740, GNorm = 0.5074, lr_0 = 5.0425e-04
Loss = 1.2223e-02, PNorm = 136.4028, GNorm = 0.1775, lr_0 = 5.0391e-04
Loss = 1.5584e-02, PNorm = 136.4366, GNorm = 0.4255, lr_0 = 5.0356e-04
Loss = 1.3288e-02, PNorm = 136.4630, GNorm = 0.1954, lr_0 = 5.0322e-04
Loss = 1.1396e-02, PNorm = 136.4879, GNorm = 0.1705, lr_0 = 5.0287e-04
Loss = 1.1543e-02, PNorm = 136.5105, GNorm = 0.4693, lr_0 = 5.0253e-04
Loss = 1.2710e-02, PNorm = 136.5388, GNorm = 0.3246, lr_0 = 5.0218e-04
Loss = 1.2509e-02, PNorm = 136.5636, GNorm = 0.4469, lr_0 = 5.0184e-04
Loss = 1.1963e-02, PNorm = 136.5915, GNorm = 0.2820, lr_0 = 5.0150e-04
Loss = 1.0139e-02, PNorm = 136.6146, GNorm = 0.2619, lr_0 = 5.0115e-04
Loss = 9.9527e-03, PNorm = 136.6398, GNorm = 0.2889, lr_0 = 5.0081e-04
Loss = 1.2762e-02, PNorm = 136.6607, GNorm = 0.2859, lr_0 = 5.0047e-04
Loss = 1.0427e-02, PNorm = 136.6832, GNorm = 0.2226, lr_0 = 5.0012e-04
Loss = 1.4867e-02, PNorm = 136.7102, GNorm = 0.2629, lr_0 = 4.9978e-04
Loss = 1.0860e-02, PNorm = 136.7409, GNorm = 0.2294, lr_0 = 4.9944e-04
Loss = 1.1769e-02, PNorm = 136.7703, GNorm = 0.3257, lr_0 = 4.9910e-04
Loss = 1.2179e-02, PNorm = 136.8018, GNorm = 0.2943, lr_0 = 4.9875e-04
Loss = 1.2123e-02, PNorm = 136.8265, GNorm = 0.1813, lr_0 = 4.9841e-04
Loss = 1.2957e-02, PNorm = 136.8538, GNorm = 0.5194, lr_0 = 4.9807e-04
Loss = 1.3367e-02, PNorm = 136.8770, GNorm = 0.0930, lr_0 = 4.9773e-04
Loss = 1.1008e-02, PNorm = 136.9038, GNorm = 0.1515, lr_0 = 4.9739e-04
Loss = 1.1378e-02, PNorm = 136.9350, GNorm = 0.2174, lr_0 = 4.9705e-04
Loss = 1.3323e-02, PNorm = 136.9630, GNorm = 0.5622, lr_0 = 4.9671e-04
Loss = 1.1755e-02, PNorm = 136.9930, GNorm = 0.2066, lr_0 = 4.9637e-04
Loss = 1.1710e-02, PNorm = 137.0186, GNorm = 0.4584, lr_0 = 4.9603e-04
Loss = 1.2862e-02, PNorm = 137.0422, GNorm = 0.3350, lr_0 = 4.9569e-04
Loss = 1.1586e-02, PNorm = 137.0645, GNorm = 0.1275, lr_0 = 4.9535e-04
Loss = 1.0757e-02, PNorm = 137.0914, GNorm = 0.1550, lr_0 = 4.9501e-04
Loss = 9.9881e-03, PNorm = 137.1196, GNorm = 0.1697, lr_0 = 4.9467e-04
Loss = 1.2682e-02, PNorm = 137.1491, GNorm = 0.2313, lr_0 = 4.9433e-04
Loss = 1.1870e-02, PNorm = 137.1765, GNorm = 0.2702, lr_0 = 4.9399e-04
Loss = 1.0248e-02, PNorm = 137.2032, GNorm = 0.3677, lr_0 = 4.9365e-04
Loss = 1.2549e-02, PNorm = 137.2308, GNorm = 0.3441, lr_0 = 4.9332e-04
Loss = 1.0907e-02, PNorm = 137.2593, GNorm = 0.4463, lr_0 = 4.9298e-04
Loss = 9.3601e-03, PNorm = 137.2878, GNorm = 0.4374, lr_0 = 4.9264e-04
Loss = 1.1240e-02, PNorm = 137.3106, GNorm = 0.5410, lr_0 = 4.9230e-04
Loss = 1.2590e-02, PNorm = 137.3283, GNorm = 0.1768, lr_0 = 4.9197e-04
Loss = 1.3528e-02, PNorm = 137.3571, GNorm = 0.4742, lr_0 = 4.9163e-04
Loss = 1.2048e-02, PNorm = 137.3915, GNorm = 0.1747, lr_0 = 4.9129e-04
Loss = 1.1045e-02, PNorm = 137.4246, GNorm = 0.1737, lr_0 = 4.9095e-04
Loss = 1.1525e-02, PNorm = 137.4560, GNorm = 0.3342, lr_0 = 4.9062e-04
Loss = 1.2366e-02, PNorm = 137.4833, GNorm = 0.1795, lr_0 = 4.9028e-04
Loss = 1.5191e-02, PNorm = 137.5152, GNorm = 0.3471, lr_0 = 4.8995e-04
Loss = 1.1505e-02, PNorm = 137.5474, GNorm = 0.2300, lr_0 = 4.8961e-04
Loss = 1.1497e-02, PNorm = 137.5770, GNorm = 0.4843, lr_0 = 4.8928e-04
Loss = 1.1972e-02, PNorm = 137.6066, GNorm = 0.2911, lr_0 = 4.8894e-04
Loss = 1.4004e-02, PNorm = 137.6436, GNorm = 0.5267, lr_0 = 4.8861e-04
Loss = 1.1305e-02, PNorm = 137.6732, GNorm = 0.3404, lr_0 = 4.8827e-04
Loss = 1.3653e-02, PNorm = 137.7042, GNorm = 0.1691, lr_0 = 4.8794e-04
Loss = 1.1767e-02, PNorm = 137.7340, GNorm = 0.1969, lr_0 = 4.8760e-04
Loss = 1.4894e-02, PNorm = 137.7589, GNorm = 0.6817, lr_0 = 4.8727e-04
Loss = 1.2955e-02, PNorm = 137.7874, GNorm = 0.2493, lr_0 = 4.8693e-04
Loss = 1.1779e-02, PNorm = 137.8141, GNorm = 0.1719, lr_0 = 4.8660e-04
Loss = 1.0760e-02, PNorm = 137.8412, GNorm = 0.2173, lr_0 = 4.8627e-04
Loss = 1.2953e-02, PNorm = 137.8706, GNorm = 0.2822, lr_0 = 4.8593e-04
Loss = 1.2102e-02, PNorm = 137.9024, GNorm = 0.3424, lr_0 = 4.8560e-04
Loss = 1.1351e-02, PNorm = 137.9327, GNorm = 0.1944, lr_0 = 4.8527e-04
Loss = 1.0449e-02, PNorm = 137.9600, GNorm = 0.1902, lr_0 = 4.8494e-04
Loss = 1.3918e-02, PNorm = 137.9873, GNorm = 0.2856, lr_0 = 4.8460e-04
Loss = 1.5745e-02, PNorm = 138.0165, GNorm = 0.2379, lr_0 = 4.8427e-04
Loss = 1.4283e-02, PNorm = 138.0467, GNorm = 0.7487, lr_0 = 4.8394e-04
Loss = 1.6464e-02, PNorm = 138.0770, GNorm = 0.1706, lr_0 = 4.8361e-04
Loss = 1.2179e-02, PNorm = 138.1066, GNorm = 0.3459, lr_0 = 4.8328e-04
Loss = 1.2656e-02, PNorm = 138.1357, GNorm = 0.1994, lr_0 = 4.8295e-04
Loss = 1.0856e-02, PNorm = 138.1611, GNorm = 0.2784, lr_0 = 4.8262e-04
Loss = 1.2068e-02, PNorm = 138.1904, GNorm = 0.4092, lr_0 = 4.8228e-04
Loss = 1.2108e-02, PNorm = 138.2181, GNorm = 0.4967, lr_0 = 4.8195e-04
Loss = 1.2650e-02, PNorm = 138.2462, GNorm = 0.4245, lr_0 = 4.8162e-04
Loss = 1.1684e-02, PNorm = 138.2736, GNorm = 0.2483, lr_0 = 4.8129e-04
Loss = 9.7058e-03, PNorm = 138.3004, GNorm = 0.1667, lr_0 = 4.8096e-04
Loss = 1.0431e-02, PNorm = 138.3246, GNorm = 0.4524, lr_0 = 4.8064e-04
Loss = 9.6669e-03, PNorm = 138.3460, GNorm = 0.1301, lr_0 = 4.8031e-04
Loss = 1.1222e-02, PNorm = 138.3740, GNorm = 0.1077, lr_0 = 4.7998e-04
Loss = 1.1430e-02, PNorm = 138.4043, GNorm = 0.2534, lr_0 = 4.7965e-04
Loss = 1.2479e-02, PNorm = 138.4335, GNorm = 0.3972, lr_0 = 4.7932e-04
Loss = 1.4092e-02, PNorm = 138.4662, GNorm = 0.1720, lr_0 = 4.7899e-04
Loss = 1.1089e-02, PNorm = 138.4955, GNorm = 0.3638, lr_0 = 4.7866e-04
Loss = 1.2863e-02, PNorm = 138.5246, GNorm = 0.3114, lr_0 = 4.7833e-04
Loss = 1.4456e-02, PNorm = 138.5630, GNorm = 0.3078, lr_0 = 4.7801e-04
Loss = 1.4197e-02, PNorm = 138.5985, GNorm = 0.1606, lr_0 = 4.7768e-04
Loss = 1.1789e-02, PNorm = 138.6309, GNorm = 0.1742, lr_0 = 4.7735e-04
Loss = 1.2006e-02, PNorm = 138.6542, GNorm = 0.2373, lr_0 = 4.7703e-04
Validation mae = 0.481103
Epoch 11
Loss = 1.1422e-02, PNorm = 138.6729, GNorm = 0.1278, lr_0 = 4.7670e-04
Loss = 1.2184e-02, PNorm = 138.6952, GNorm = 0.3073, lr_0 = 4.7637e-04
Loss = 9.7676e-03, PNorm = 138.7204, GNorm = 0.4263, lr_0 = 4.7605e-04
Loss = 1.2727e-02, PNorm = 138.7493, GNorm = 0.2580, lr_0 = 4.7572e-04
Loss = 1.1192e-02, PNorm = 138.7710, GNorm = 0.1708, lr_0 = 4.7539e-04
Loss = 1.0861e-02, PNorm = 138.7900, GNorm = 0.3583, lr_0 = 4.7507e-04
Loss = 9.9617e-03, PNorm = 138.8126, GNorm = 0.5918, lr_0 = 4.7474e-04
Loss = 9.9561e-03, PNorm = 138.8312, GNorm = 0.1658, lr_0 = 4.7442e-04
Loss = 1.0035e-02, PNorm = 138.8458, GNorm = 0.3953, lr_0 = 4.7409e-04
Loss = 9.1380e-03, PNorm = 138.8649, GNorm = 0.3394, lr_0 = 4.7377e-04
Loss = 1.2123e-02, PNorm = 138.8851, GNorm = 0.2862, lr_0 = 4.7344e-04
Loss = 1.1340e-02, PNorm = 138.9111, GNorm = 0.3157, lr_0 = 4.7312e-04
Loss = 8.9369e-03, PNorm = 138.9381, GNorm = 0.2230, lr_0 = 4.7279e-04
Loss = 1.0248e-02, PNorm = 138.9589, GNorm = 0.1349, lr_0 = 4.7247e-04
Loss = 1.0541e-02, PNorm = 138.9800, GNorm = 0.1414, lr_0 = 4.7215e-04
Loss = 1.0366e-02, PNorm = 138.9994, GNorm = 0.2543, lr_0 = 4.7182e-04
Loss = 1.0958e-02, PNorm = 139.0203, GNorm = 0.5873, lr_0 = 4.7150e-04
Loss = 1.1857e-02, PNorm = 139.0411, GNorm = 0.1810, lr_0 = 4.7118e-04
Loss = 9.2565e-03, PNorm = 139.0655, GNorm = 0.1165, lr_0 = 4.7085e-04
Loss = 1.1065e-02, PNorm = 139.0855, GNorm = 0.3652, lr_0 = 4.7053e-04
Loss = 1.0611e-02, PNorm = 139.1076, GNorm = 0.3855, lr_0 = 4.7021e-04
Loss = 1.0639e-02, PNorm = 139.1309, GNorm = 0.4845, lr_0 = 4.6989e-04
Loss = 1.0713e-02, PNorm = 139.1515, GNorm = 0.2267, lr_0 = 4.6957e-04
Loss = 9.7503e-03, PNorm = 139.1708, GNorm = 0.3533, lr_0 = 4.6924e-04
Loss = 8.9390e-03, PNorm = 139.1953, GNorm = 0.2612, lr_0 = 4.6892e-04
Loss = 8.9334e-03, PNorm = 139.2179, GNorm = 0.1014, lr_0 = 4.6860e-04
Loss = 1.2176e-02, PNorm = 139.2361, GNorm = 0.1994, lr_0 = 4.6828e-04
Loss = 9.6018e-03, PNorm = 139.2552, GNorm = 0.2915, lr_0 = 4.6796e-04
Loss = 8.9618e-03, PNorm = 139.2802, GNorm = 0.3920, lr_0 = 4.6764e-04
Loss = 1.0207e-02, PNorm = 139.3047, GNorm = 0.2292, lr_0 = 4.6732e-04
Loss = 1.2492e-02, PNorm = 139.3309, GNorm = 0.1998, lr_0 = 4.6700e-04
Loss = 9.9273e-03, PNorm = 139.3534, GNorm = 0.3028, lr_0 = 4.6668e-04
Loss = 1.0694e-02, PNorm = 139.3788, GNorm = 0.1687, lr_0 = 4.6636e-04
Loss = 1.0176e-02, PNorm = 139.4050, GNorm = 0.3343, lr_0 = 4.6604e-04
Loss = 1.0516e-02, PNorm = 139.4242, GNorm = 0.3854, lr_0 = 4.6572e-04
Loss = 9.8445e-03, PNorm = 139.4431, GNorm = 0.2189, lr_0 = 4.6540e-04
Loss = 9.4629e-03, PNorm = 139.4644, GNorm = 0.5177, lr_0 = 4.6508e-04
Loss = 1.3578e-02, PNorm = 139.4887, GNorm = 0.3131, lr_0 = 4.6476e-04
Loss = 1.0794e-02, PNorm = 139.5153, GNorm = 0.5870, lr_0 = 4.6445e-04
Loss = 9.7689e-03, PNorm = 139.5410, GNorm = 0.2570, lr_0 = 4.6413e-04
Loss = 9.5662e-03, PNorm = 139.5672, GNorm = 0.3921, lr_0 = 4.6381e-04
Loss = 9.0737e-03, PNorm = 139.5910, GNorm = 0.3325, lr_0 = 4.6349e-04
Loss = 1.0665e-02, PNorm = 139.6090, GNorm = 0.2145, lr_0 = 4.6317e-04
Loss = 7.9189e-03, PNorm = 139.6307, GNorm = 0.1550, lr_0 = 4.6286e-04
Loss = 1.0724e-02, PNorm = 139.6507, GNorm = 0.4031, lr_0 = 4.6254e-04
Loss = 1.0083e-02, PNorm = 139.6722, GNorm = 0.2379, lr_0 = 4.6222e-04
Loss = 1.2318e-02, PNorm = 139.6991, GNorm = 0.3601, lr_0 = 4.6191e-04
Loss = 1.2065e-02, PNorm = 139.7245, GNorm = 0.1795, lr_0 = 4.6159e-04
Loss = 1.0103e-02, PNorm = 139.7467, GNorm = 0.3023, lr_0 = 4.6127e-04
Loss = 1.0701e-02, PNorm = 139.7690, GNorm = 0.2812, lr_0 = 4.6096e-04
Loss = 9.8059e-03, PNorm = 139.7937, GNorm = 0.1703, lr_0 = 4.6064e-04
Loss = 1.0968e-02, PNorm = 139.8174, GNorm = 0.6007, lr_0 = 4.6033e-04
Loss = 1.0767e-02, PNorm = 139.8397, GNorm = 0.1139, lr_0 = 4.6001e-04
Loss = 8.2657e-03, PNorm = 139.8603, GNorm = 0.2602, lr_0 = 4.5970e-04
Loss = 8.9523e-03, PNorm = 139.8824, GNorm = 0.2898, lr_0 = 4.5938e-04
Loss = 7.8283e-03, PNorm = 139.9084, GNorm = 0.1345, lr_0 = 4.5907e-04
Loss = 1.1013e-02, PNorm = 139.9355, GNorm = 0.4380, lr_0 = 4.5875e-04
Loss = 1.0041e-02, PNorm = 139.9578, GNorm = 0.1986, lr_0 = 4.5844e-04
Loss = 1.0874e-02, PNorm = 139.9804, GNorm = 0.2454, lr_0 = 4.5812e-04
Loss = 9.3940e-03, PNorm = 140.0096, GNorm = 0.2092, lr_0 = 4.5781e-04
Loss = 9.4457e-03, PNorm = 140.0321, GNorm = 0.1712, lr_0 = 4.5750e-04
Loss = 9.5134e-03, PNorm = 140.0550, GNorm = 0.3100, lr_0 = 4.5718e-04
Loss = 1.0416e-02, PNorm = 140.0803, GNorm = 0.1727, lr_0 = 4.5687e-04
Loss = 1.0294e-02, PNorm = 140.1041, GNorm = 0.2352, lr_0 = 4.5656e-04
Loss = 9.6399e-03, PNorm = 140.1241, GNorm = 0.3098, lr_0 = 4.5624e-04
Loss = 1.0293e-02, PNorm = 140.1471, GNorm = 0.1343, lr_0 = 4.5593e-04
Loss = 1.0441e-02, PNorm = 140.1707, GNorm = 0.1172, lr_0 = 4.5562e-04
Loss = 8.6144e-03, PNorm = 140.1956, GNorm = 0.2191, lr_0 = 4.5531e-04
Loss = 9.3001e-03, PNorm = 140.2278, GNorm = 0.3296, lr_0 = 4.5499e-04
Loss = 1.0341e-02, PNorm = 140.2509, GNorm = 0.1542, lr_0 = 4.5468e-04
Loss = 8.4898e-03, PNorm = 140.2720, GNorm = 0.1407, lr_0 = 4.5437e-04
Loss = 1.0575e-02, PNorm = 140.3001, GNorm = 0.1212, lr_0 = 4.5406e-04
Loss = 8.8524e-03, PNorm = 140.3254, GNorm = 0.2292, lr_0 = 4.5375e-04
Loss = 8.9953e-03, PNorm = 140.3477, GNorm = 0.4620, lr_0 = 4.5344e-04
Loss = 1.1973e-02, PNorm = 140.3702, GNorm = 0.2518, lr_0 = 4.5313e-04
Loss = 8.9928e-03, PNorm = 140.3924, GNorm = 0.2751, lr_0 = 4.5282e-04
Loss = 9.4368e-03, PNorm = 140.4114, GNorm = 0.4333, lr_0 = 4.5251e-04
Loss = 1.2221e-02, PNorm = 140.4383, GNorm = 0.6338, lr_0 = 4.5220e-04
Loss = 1.0779e-02, PNorm = 140.4701, GNorm = 0.2601, lr_0 = 4.5189e-04
Loss = 8.9912e-03, PNorm = 140.4934, GNorm = 0.1661, lr_0 = 4.5158e-04
Loss = 1.2031e-02, PNorm = 140.5178, GNorm = 0.1005, lr_0 = 4.5127e-04
Loss = 1.1778e-02, PNorm = 140.5482, GNorm = 0.3557, lr_0 = 4.5096e-04
Loss = 9.5088e-03, PNorm = 140.5727, GNorm = 0.3859, lr_0 = 4.5065e-04
Loss = 1.2084e-02, PNorm = 140.5969, GNorm = 0.3233, lr_0 = 4.5034e-04
Loss = 1.0485e-02, PNorm = 140.6214, GNorm = 0.7868, lr_0 = 4.5003e-04
Loss = 9.5450e-03, PNorm = 140.6455, GNorm = 0.2589, lr_0 = 4.4972e-04
Loss = 9.6051e-03, PNorm = 140.6681, GNorm = 0.3514, lr_0 = 4.4942e-04
Loss = 1.1505e-02, PNorm = 140.6960, GNorm = 0.4881, lr_0 = 4.4911e-04
Loss = 1.1415e-02, PNorm = 140.7251, GNorm = 0.4652, lr_0 = 4.4880e-04
Loss = 9.6821e-03, PNorm = 140.7520, GNorm = 0.2399, lr_0 = 4.4849e-04
Loss = 8.7665e-03, PNorm = 140.7780, GNorm = 0.2037, lr_0 = 4.4819e-04
Loss = 1.0619e-02, PNorm = 140.8024, GNorm = 0.1860, lr_0 = 4.4788e-04
Loss = 7.4915e-03, PNorm = 140.8279, GNorm = 0.1327, lr_0 = 4.4757e-04
Loss = 1.4022e-02, PNorm = 140.8570, GNorm = 0.2248, lr_0 = 4.4727e-04
Loss = 1.0604e-02, PNorm = 140.8794, GNorm = 0.3032, lr_0 = 4.4696e-04
Loss = 8.3281e-03, PNorm = 140.9033, GNorm = 0.2166, lr_0 = 4.4665e-04
Loss = 1.1357e-02, PNorm = 140.9257, GNorm = 0.3468, lr_0 = 4.4635e-04
Loss = 9.5499e-03, PNorm = 140.9493, GNorm = 0.5559, lr_0 = 4.4604e-04
Loss = 9.9219e-03, PNorm = 140.9722, GNorm = 0.6184, lr_0 = 4.4574e-04
Loss = 1.1095e-02, PNorm = 140.9962, GNorm = 0.2862, lr_0 = 4.4543e-04
Loss = 1.0871e-02, PNorm = 141.0251, GNorm = 0.2969, lr_0 = 4.4513e-04
Loss = 1.1725e-02, PNorm = 141.0526, GNorm = 0.2430, lr_0 = 4.4482e-04
Loss = 9.7020e-03, PNorm = 141.0770, GNorm = 0.1344, lr_0 = 4.4452e-04
Loss = 1.0376e-02, PNorm = 141.0996, GNorm = 0.1681, lr_0 = 4.4421e-04
Loss = 1.0821e-02, PNorm = 141.1238, GNorm = 0.3913, lr_0 = 4.4391e-04
Loss = 8.0787e-03, PNorm = 141.1502, GNorm = 0.2537, lr_0 = 4.4360e-04
Loss = 1.1119e-02, PNorm = 141.1740, GNorm = 0.4991, lr_0 = 4.4330e-04
Loss = 1.1013e-02, PNorm = 141.1933, GNorm = 0.3456, lr_0 = 4.4299e-04
Loss = 1.1397e-02, PNorm = 141.2183, GNorm = 0.2974, lr_0 = 4.4269e-04
Loss = 1.1463e-02, PNorm = 141.2521, GNorm = 0.3736, lr_0 = 4.4239e-04
Loss = 8.0115e-03, PNorm = 141.2819, GNorm = 0.1335, lr_0 = 4.4209e-04
Loss = 1.2384e-02, PNorm = 141.3066, GNorm = 0.2832, lr_0 = 4.4178e-04
Loss = 9.5568e-03, PNorm = 141.3256, GNorm = 0.2777, lr_0 = 4.4148e-04
Loss = 1.1056e-02, PNorm = 141.3458, GNorm = 0.5215, lr_0 = 4.4118e-04
Loss = 1.2518e-02, PNorm = 141.3678, GNorm = 0.4028, lr_0 = 4.4088e-04
Loss = 1.2594e-02, PNorm = 141.3975, GNorm = 0.3037, lr_0 = 4.4057e-04
Loss = 8.5644e-03, PNorm = 141.4253, GNorm = 0.0848, lr_0 = 4.4027e-04
Loss = 1.0190e-02, PNorm = 141.4524, GNorm = 0.0878, lr_0 = 4.3997e-04
Loss = 7.7865e-03, PNorm = 141.4823, GNorm = 0.4616, lr_0 = 4.3967e-04
Loss = 8.7452e-03, PNorm = 141.5072, GNorm = 0.4561, lr_0 = 4.3937e-04
Validation mae = 0.481658
Epoch 12
Loss = 1.0246e-02, PNorm = 141.5260, GNorm = 0.1758, lr_0 = 4.3907e-04
Loss = 9.6635e-03, PNorm = 141.5427, GNorm = 0.4295, lr_0 = 4.3877e-04
Loss = 9.9743e-03, PNorm = 141.5564, GNorm = 0.2759, lr_0 = 4.3846e-04
Loss = 1.1211e-02, PNorm = 141.5713, GNorm = 0.3873, lr_0 = 4.3816e-04
Loss = 1.1974e-02, PNorm = 141.5875, GNorm = 0.2216, lr_0 = 4.3786e-04
Loss = 9.0136e-03, PNorm = 141.6116, GNorm = 0.1289, lr_0 = 4.3756e-04
Loss = 8.8574e-03, PNorm = 141.6307, GNorm = 0.4413, lr_0 = 4.3726e-04
Loss = 8.3550e-03, PNorm = 141.6501, GNorm = 0.2548, lr_0 = 4.3696e-04
Loss = 8.9925e-03, PNorm = 141.6691, GNorm = 0.1831, lr_0 = 4.3667e-04
Loss = 8.3867e-03, PNorm = 141.6903, GNorm = 0.1289, lr_0 = 4.3637e-04
Loss = 8.2910e-03, PNorm = 141.7082, GNorm = 0.2233, lr_0 = 4.3607e-04
Loss = 8.5841e-03, PNorm = 141.7239, GNorm = 0.1004, lr_0 = 4.3577e-04
Loss = 8.6369e-03, PNorm = 141.7397, GNorm = 0.1888, lr_0 = 4.3547e-04
Loss = 9.8965e-03, PNorm = 141.7589, GNorm = 0.1411, lr_0 = 4.3517e-04
Loss = 7.5753e-03, PNorm = 141.7735, GNorm = 0.3486, lr_0 = 4.3487e-04
Loss = 9.5957e-03, PNorm = 141.7871, GNorm = 0.3227, lr_0 = 4.3458e-04
Loss = 8.2585e-03, PNorm = 141.8042, GNorm = 0.3101, lr_0 = 4.3428e-04
Loss = 7.8259e-03, PNorm = 141.8182, GNorm = 0.1405, lr_0 = 4.3398e-04
Loss = 8.0904e-03, PNorm = 141.8317, GNorm = 0.2295, lr_0 = 4.3368e-04
Loss = 9.4303e-03, PNorm = 141.8456, GNorm = 0.3946, lr_0 = 4.3339e-04
Loss = 1.0577e-02, PNorm = 141.8666, GNorm = 0.1979, lr_0 = 4.3309e-04
Loss = 9.1668e-03, PNorm = 141.8871, GNorm = 0.3502, lr_0 = 4.3279e-04
Loss = 9.5526e-03, PNorm = 141.9084, GNorm = 0.1951, lr_0 = 4.3250e-04
Loss = 7.3406e-03, PNorm = 141.9269, GNorm = 0.3399, lr_0 = 4.3220e-04
Loss = 1.3921e-02, PNorm = 141.9442, GNorm = 0.1605, lr_0 = 4.3190e-04
Loss = 9.5246e-03, PNorm = 141.9612, GNorm = 0.2382, lr_0 = 4.3161e-04
Loss = 9.4713e-03, PNorm = 141.9843, GNorm = 0.3588, lr_0 = 4.3131e-04
Loss = 8.2452e-03, PNorm = 142.0073, GNorm = 0.3459, lr_0 = 4.3102e-04
Loss = 1.2533e-02, PNorm = 142.0247, GNorm = 0.9917, lr_0 = 4.3072e-04
Loss = 9.9227e-03, PNorm = 142.0444, GNorm = 0.2192, lr_0 = 4.3043e-04
Loss = 9.8656e-03, PNorm = 142.0608, GNorm = 0.1178, lr_0 = 4.3013e-04
Loss = 8.1519e-03, PNorm = 142.0828, GNorm = 0.1296, lr_0 = 4.2984e-04
Loss = 8.3550e-03, PNorm = 142.0990, GNorm = 0.2784, lr_0 = 4.2954e-04
Loss = 6.9829e-03, PNorm = 142.1151, GNorm = 0.3958, lr_0 = 4.2925e-04
Loss = 9.7284e-03, PNorm = 142.1297, GNorm = 0.3221, lr_0 = 4.2895e-04
Loss = 8.8117e-03, PNorm = 142.1428, GNorm = 0.3039, lr_0 = 4.2866e-04
Loss = 6.6853e-03, PNorm = 142.1593, GNorm = 0.2205, lr_0 = 4.2837e-04
Loss = 1.2318e-02, PNorm = 142.1777, GNorm = 0.1516, lr_0 = 4.2807e-04
Loss = 9.5473e-03, PNorm = 142.2059, GNorm = 0.1880, lr_0 = 4.2778e-04
Loss = 7.1192e-03, PNorm = 142.2293, GNorm = 0.2038, lr_0 = 4.2749e-04
Loss = 1.0020e-02, PNorm = 142.2465, GNorm = 0.2388, lr_0 = 4.2719e-04
Loss = 7.9488e-03, PNorm = 142.2625, GNorm = 0.2439, lr_0 = 4.2690e-04
Loss = 7.2283e-03, PNorm = 142.2829, GNorm = 0.2655, lr_0 = 4.2661e-04
Loss = 8.9368e-03, PNorm = 142.2987, GNorm = 0.3879, lr_0 = 4.2632e-04
Loss = 8.0992e-03, PNorm = 142.3199, GNorm = 0.3727, lr_0 = 4.2602e-04
Loss = 7.9088e-03, PNorm = 142.3378, GNorm = 0.2293, lr_0 = 4.2573e-04
Loss = 1.1068e-02, PNorm = 142.3563, GNorm = 0.4381, lr_0 = 4.2544e-04
Loss = 1.0681e-02, PNorm = 142.3777, GNorm = 0.4196, lr_0 = 4.2515e-04
Loss = 8.2732e-03, PNorm = 142.4038, GNorm = 0.3778, lr_0 = 4.2486e-04
Loss = 8.4512e-03, PNorm = 142.4219, GNorm = 0.2739, lr_0 = 4.2457e-04
Loss = 8.3220e-03, PNorm = 142.4412, GNorm = 0.2579, lr_0 = 4.2428e-04
Loss = 8.9688e-03, PNorm = 142.4596, GNorm = 0.1840, lr_0 = 4.2399e-04
Loss = 9.3849e-03, PNorm = 142.4774, GNorm = 0.2753, lr_0 = 4.2370e-04
Loss = 9.2871e-03, PNorm = 142.4952, GNorm = 0.2225, lr_0 = 4.2340e-04
Loss = 9.3706e-03, PNorm = 142.5159, GNorm = 0.4904, lr_0 = 4.2311e-04
Loss = 9.2833e-03, PNorm = 142.5389, GNorm = 0.2220, lr_0 = 4.2283e-04
Loss = 6.8040e-03, PNorm = 142.5565, GNorm = 0.0782, lr_0 = 4.2254e-04
Loss = 7.9733e-03, PNorm = 142.5759, GNorm = 0.3311, lr_0 = 4.2225e-04
Loss = 9.7147e-03, PNorm = 142.5978, GNorm = 0.3630, lr_0 = 4.2196e-04
Loss = 8.5135e-03, PNorm = 142.6213, GNorm = 0.2686, lr_0 = 4.2167e-04
Loss = 7.6970e-03, PNorm = 142.6408, GNorm = 0.0895, lr_0 = 4.2138e-04
Loss = 9.5260e-03, PNorm = 142.6631, GNorm = 0.3581, lr_0 = 4.2109e-04
Loss = 7.8850e-03, PNorm = 142.6863, GNorm = 0.2544, lr_0 = 4.2080e-04
Loss = 7.8637e-03, PNorm = 142.7033, GNorm = 0.2001, lr_0 = 4.2051e-04
Loss = 7.1304e-03, PNorm = 142.7227, GNorm = 0.1679, lr_0 = 4.2023e-04
Loss = 1.0830e-02, PNorm = 142.7402, GNorm = 0.2165, lr_0 = 4.1994e-04
Loss = 1.1739e-02, PNorm = 142.7565, GNorm = 0.3402, lr_0 = 4.1965e-04
Loss = 8.8559e-03, PNorm = 142.7724, GNorm = 0.1827, lr_0 = 4.1936e-04
Loss = 8.2383e-03, PNorm = 142.7880, GNorm = 0.3077, lr_0 = 4.1907e-04
Loss = 1.0649e-02, PNorm = 142.8057, GNorm = 0.2196, lr_0 = 4.1879e-04
Loss = 7.2038e-03, PNorm = 142.8238, GNorm = 0.0879, lr_0 = 4.1850e-04
Loss = 8.6195e-03, PNorm = 142.8408, GNorm = 0.0867, lr_0 = 4.1821e-04
Loss = 1.0254e-02, PNorm = 142.8586, GNorm = 0.2434, lr_0 = 4.1793e-04
Loss = 9.5367e-03, PNorm = 142.8810, GNorm = 0.2387, lr_0 = 4.1764e-04
Loss = 9.5717e-03, PNorm = 142.9065, GNorm = 0.4803, lr_0 = 4.1736e-04
Loss = 9.6082e-03, PNorm = 142.9271, GNorm = 0.2090, lr_0 = 4.1707e-04
Loss = 7.1023e-03, PNorm = 142.9463, GNorm = 0.1874, lr_0 = 4.1678e-04
Loss = 8.5346e-03, PNorm = 142.9654, GNorm = 0.4735, lr_0 = 4.1650e-04
Loss = 8.8068e-03, PNorm = 142.9880, GNorm = 0.3783, lr_0 = 4.1621e-04
Loss = 8.2656e-03, PNorm = 143.0093, GNorm = 0.1404, lr_0 = 4.1593e-04
Loss = 7.4396e-03, PNorm = 143.0273, GNorm = 0.2198, lr_0 = 4.1564e-04
Loss = 7.9243e-03, PNorm = 143.0483, GNorm = 0.2257, lr_0 = 4.1536e-04
Loss = 9.1687e-03, PNorm = 143.0676, GNorm = 0.4340, lr_0 = 4.1507e-04
Loss = 9.0311e-03, PNorm = 143.0817, GNorm = 0.1617, lr_0 = 4.1479e-04
Loss = 8.2076e-03, PNorm = 143.1042, GNorm = 0.2370, lr_0 = 4.1450e-04
Loss = 6.9097e-03, PNorm = 143.1251, GNorm = 0.0891, lr_0 = 4.1422e-04
Loss = 8.7980e-03, PNorm = 143.1472, GNorm = 0.2597, lr_0 = 4.1394e-04
Loss = 9.9175e-03, PNorm = 143.1719, GNorm = 0.2427, lr_0 = 4.1365e-04
Loss = 7.7165e-03, PNorm = 143.1928, GNorm = 0.2053, lr_0 = 4.1337e-04
Loss = 7.8430e-03, PNorm = 143.2134, GNorm = 0.2456, lr_0 = 4.1309e-04
Loss = 1.0203e-02, PNorm = 143.2297, GNorm = 0.2067, lr_0 = 4.1280e-04
Loss = 6.7771e-03, PNorm = 143.2465, GNorm = 0.1871, lr_0 = 4.1252e-04
Loss = 8.6277e-03, PNorm = 143.2614, GNorm = 0.2918, lr_0 = 4.1224e-04
Loss = 9.0243e-03, PNorm = 143.2848, GNorm = 0.2010, lr_0 = 4.1196e-04
Loss = 1.1316e-02, PNorm = 143.3027, GNorm = 0.3609, lr_0 = 4.1167e-04
Loss = 9.0253e-03, PNorm = 143.3224, GNorm = 0.1682, lr_0 = 4.1139e-04
Loss = 7.3029e-03, PNorm = 143.3446, GNorm = 0.1770, lr_0 = 4.1111e-04
Loss = 7.6286e-03, PNorm = 143.3667, GNorm = 0.1105, lr_0 = 4.1083e-04
Loss = 7.9062e-03, PNorm = 143.3857, GNorm = 0.2139, lr_0 = 4.1055e-04
Loss = 8.0722e-03, PNorm = 143.4085, GNorm = 0.1582, lr_0 = 4.1027e-04
Loss = 9.0689e-03, PNorm = 143.4256, GNorm = 0.5060, lr_0 = 4.0998e-04
Loss = 7.7439e-03, PNorm = 143.4489, GNorm = 0.2292, lr_0 = 4.0970e-04
Loss = 9.3878e-03, PNorm = 143.4649, GNorm = 0.2852, lr_0 = 4.0942e-04
Loss = 8.5385e-03, PNorm = 143.4850, GNorm = 0.2779, lr_0 = 4.0914e-04
Loss = 8.2905e-03, PNorm = 143.5052, GNorm = 0.5167, lr_0 = 4.0886e-04
Loss = 8.7995e-03, PNorm = 143.5274, GNorm = 0.2313, lr_0 = 4.0858e-04
Loss = 1.0382e-02, PNorm = 143.5458, GNorm = 0.1050, lr_0 = 4.0830e-04
Loss = 9.7872e-03, PNorm = 143.5683, GNorm = 0.4564, lr_0 = 4.0802e-04
Loss = 8.4622e-03, PNorm = 143.5934, GNorm = 0.3907, lr_0 = 4.0774e-04
Loss = 7.3918e-03, PNorm = 143.6154, GNorm = 0.3174, lr_0 = 4.0746e-04
Loss = 8.1129e-03, PNorm = 143.6328, GNorm = 0.1643, lr_0 = 4.0718e-04
Loss = 7.5954e-03, PNorm = 143.6536, GNorm = 0.4110, lr_0 = 4.0691e-04
Loss = 7.8034e-03, PNorm = 143.6722, GNorm = 0.1860, lr_0 = 4.0663e-04
Loss = 7.9800e-03, PNorm = 143.6923, GNorm = 0.1229, lr_0 = 4.0635e-04
Loss = 9.7151e-03, PNorm = 143.7119, GNorm = 0.2615, lr_0 = 4.0607e-04
Loss = 7.7276e-03, PNorm = 143.7306, GNorm = 0.2608, lr_0 = 4.0579e-04
Loss = 8.8071e-03, PNorm = 143.7545, GNorm = 0.1697, lr_0 = 4.0551e-04
Loss = 7.5163e-03, PNorm = 143.7764, GNorm = 0.4002, lr_0 = 4.0524e-04
Loss = 9.4116e-03, PNorm = 143.7953, GNorm = 0.6252, lr_0 = 4.0496e-04
Loss = 8.8260e-03, PNorm = 143.8151, GNorm = 0.2170, lr_0 = 4.0468e-04
Validation mae = 0.480128
Epoch 13
Loss = 8.9706e-03, PNorm = 143.8322, GNorm = 0.1015, lr_0 = 4.0440e-04
Loss = 7.5788e-03, PNorm = 143.8426, GNorm = 0.1721, lr_0 = 4.0413e-04
Loss = 8.6641e-03, PNorm = 143.8559, GNorm = 0.2962, lr_0 = 4.0385e-04
Loss = 7.4914e-03, PNorm = 143.8716, GNorm = 0.1073, lr_0 = 4.0357e-04
Loss = 9.2577e-03, PNorm = 143.8853, GNorm = 0.2576, lr_0 = 4.0330e-04
Loss = 7.4142e-03, PNorm = 143.8953, GNorm = 0.4406, lr_0 = 4.0302e-04
Loss = 7.2779e-03, PNorm = 143.9059, GNorm = 0.2382, lr_0 = 4.0274e-04
Loss = 8.6536e-03, PNorm = 143.9203, GNorm = 0.4506, lr_0 = 4.0247e-04
Loss = 7.9569e-03, PNorm = 143.9382, GNorm = 0.1964, lr_0 = 4.0219e-04
Loss = 6.8408e-03, PNorm = 143.9532, GNorm = 0.4078, lr_0 = 4.0192e-04
Loss = 8.0807e-03, PNorm = 143.9671, GNorm = 0.2593, lr_0 = 4.0164e-04
Loss = 7.0694e-03, PNorm = 143.9817, GNorm = 0.2278, lr_0 = 4.0137e-04
Loss = 6.3879e-03, PNorm = 143.9985, GNorm = 0.2547, lr_0 = 4.0109e-04
Loss = 8.6177e-03, PNorm = 144.0162, GNorm = 0.3821, lr_0 = 4.0082e-04
Loss = 6.2544e-03, PNorm = 144.0338, GNorm = 0.1103, lr_0 = 4.0054e-04
Loss = 8.4422e-03, PNorm = 144.0479, GNorm = 0.3959, lr_0 = 4.0027e-04
Loss = 7.1836e-03, PNorm = 144.0588, GNorm = 0.3152, lr_0 = 3.9999e-04
Loss = 7.6383e-03, PNorm = 144.0757, GNorm = 0.4020, lr_0 = 3.9972e-04
Loss = 6.9366e-03, PNorm = 144.0949, GNorm = 0.2677, lr_0 = 3.9945e-04
Loss = 6.4682e-03, PNorm = 144.1113, GNorm = 0.1464, lr_0 = 3.9917e-04
Loss = 6.6111e-03, PNorm = 144.1210, GNorm = 0.3581, lr_0 = 3.9890e-04
Loss = 6.6688e-03, PNorm = 144.1320, GNorm = 0.2016, lr_0 = 3.9863e-04
Loss = 7.4267e-03, PNorm = 144.1454, GNorm = 0.1095, lr_0 = 3.9835e-04
Loss = 7.8698e-03, PNorm = 144.1614, GNorm = 0.1514, lr_0 = 3.9808e-04
Loss = 6.4904e-03, PNorm = 144.1748, GNorm = 0.3470, lr_0 = 3.9781e-04
Loss = 7.5642e-03, PNorm = 144.1925, GNorm = 0.1966, lr_0 = 3.9753e-04
Loss = 6.5062e-03, PNorm = 144.2087, GNorm = 0.2988, lr_0 = 3.9726e-04
Loss = 6.0925e-03, PNorm = 144.2221, GNorm = 0.2397, lr_0 = 3.9699e-04
Loss = 5.5392e-03, PNorm = 144.2353, GNorm = 0.2060, lr_0 = 3.9672e-04
Loss = 7.1136e-03, PNorm = 144.2565, GNorm = 0.0811, lr_0 = 3.9645e-04
Loss = 6.8282e-03, PNorm = 144.2701, GNorm = 0.2842, lr_0 = 3.9617e-04
Loss = 6.7251e-03, PNorm = 144.2849, GNorm = 0.4128, lr_0 = 3.9590e-04
Loss = 7.0180e-03, PNorm = 144.2932, GNorm = 0.3657, lr_0 = 3.9563e-04
Loss = 7.6410e-03, PNorm = 144.3095, GNorm = 0.1834, lr_0 = 3.9536e-04
Loss = 7.2913e-03, PNorm = 144.3302, GNorm = 0.3663, lr_0 = 3.9509e-04
Loss = 8.8681e-03, PNorm = 144.3469, GNorm = 0.2715, lr_0 = 3.9482e-04
Loss = 6.1124e-03, PNorm = 144.3668, GNorm = 0.2360, lr_0 = 3.9455e-04
Loss = 7.1220e-03, PNorm = 144.3825, GNorm = 0.3464, lr_0 = 3.9428e-04
Loss = 5.6921e-03, PNorm = 144.3961, GNorm = 0.1105, lr_0 = 3.9401e-04
Loss = 6.6146e-03, PNorm = 144.4119, GNorm = 0.2863, lr_0 = 3.9374e-04
Loss = 7.8226e-03, PNorm = 144.4268, GNorm = 0.2266, lr_0 = 3.9347e-04
Loss = 6.2043e-03, PNorm = 144.4427, GNorm = 0.1686, lr_0 = 3.9320e-04
Loss = 8.0633e-03, PNorm = 144.4597, GNorm = 0.2193, lr_0 = 3.9293e-04
Loss = 6.3016e-03, PNorm = 144.4757, GNorm = 0.3882, lr_0 = 3.9266e-04
Loss = 6.6563e-03, PNorm = 144.4926, GNorm = 0.2188, lr_0 = 3.9239e-04
Loss = 7.2802e-03, PNorm = 144.5056, GNorm = 0.1766, lr_0 = 3.9212e-04
Loss = 7.9598e-03, PNorm = 144.5245, GNorm = 0.2371, lr_0 = 3.9185e-04
Loss = 8.4738e-03, PNorm = 144.5417, GNorm = 0.2855, lr_0 = 3.9159e-04
Loss = 7.6084e-03, PNorm = 144.5592, GNorm = 0.2515, lr_0 = 3.9132e-04
Loss = 6.8860e-03, PNorm = 144.5785, GNorm = 0.2106, lr_0 = 3.9105e-04
Loss = 8.3866e-03, PNorm = 144.5970, GNorm = 0.2865, lr_0 = 3.9078e-04
Loss = 6.8127e-03, PNorm = 144.6142, GNorm = 0.2870, lr_0 = 3.9051e-04
Loss = 8.8106e-03, PNorm = 144.6297, GNorm = 0.3423, lr_0 = 3.9025e-04
Loss = 7.0930e-03, PNorm = 144.6477, GNorm = 0.0823, lr_0 = 3.8998e-04
Loss = 6.0298e-03, PNorm = 144.6638, GNorm = 0.1632, lr_0 = 3.8971e-04
Loss = 1.2399e-02, PNorm = 144.6791, GNorm = 0.3154, lr_0 = 3.8945e-04
Loss = 6.6352e-03, PNorm = 144.6951, GNorm = 0.4706, lr_0 = 3.8918e-04
Loss = 7.8982e-03, PNorm = 144.7141, GNorm = 0.2016, lr_0 = 3.8891e-04
Loss = 7.5132e-03, PNorm = 144.7338, GNorm = 0.1474, lr_0 = 3.8865e-04
Loss = 6.2868e-03, PNorm = 144.7503, GNorm = 0.2449, lr_0 = 3.8838e-04
Loss = 8.0527e-03, PNorm = 144.7605, GNorm = 0.1939, lr_0 = 3.8811e-04
Loss = 8.0673e-03, PNorm = 144.7710, GNorm = 0.2279, lr_0 = 3.8785e-04
Loss = 6.1077e-03, PNorm = 144.7834, GNorm = 0.2078, lr_0 = 3.8758e-04
Loss = 5.8277e-03, PNorm = 144.7918, GNorm = 0.3117, lr_0 = 3.8732e-04
Loss = 7.6672e-03, PNorm = 144.8091, GNorm = 0.3769, lr_0 = 3.8705e-04
Loss = 6.6430e-03, PNorm = 144.8266, GNorm = 0.1513, lr_0 = 3.8679e-04
Loss = 8.3869e-03, PNorm = 144.8416, GNorm = 0.1392, lr_0 = 3.8652e-04
Loss = 7.6835e-03, PNorm = 144.8597, GNorm = 0.3010, lr_0 = 3.8626e-04
Loss = 8.2247e-03, PNorm = 144.8793, GNorm = 0.2597, lr_0 = 3.8599e-04
Loss = 9.3793e-03, PNorm = 144.9016, GNorm = 0.1490, lr_0 = 3.8573e-04
Loss = 6.8172e-03, PNorm = 144.9233, GNorm = 0.4290, lr_0 = 3.8546e-04
Loss = 6.5934e-03, PNorm = 144.9394, GNorm = 0.1830, lr_0 = 3.8520e-04
Loss = 7.6403e-03, PNorm = 144.9533, GNorm = 0.7390, lr_0 = 3.8493e-04
Loss = 5.4210e-03, PNorm = 144.9659, GNorm = 0.2197, lr_0 = 3.8467e-04
Loss = 6.8522e-03, PNorm = 144.9826, GNorm = 0.2893, lr_0 = 3.8441e-04
Loss = 7.9600e-03, PNorm = 144.9968, GNorm = 0.1432, lr_0 = 3.8414e-04
Loss = 7.8538e-03, PNorm = 145.0120, GNorm = 0.1405, lr_0 = 3.8388e-04
Loss = 6.3873e-03, PNorm = 145.0262, GNorm = 0.3563, lr_0 = 3.8362e-04
Loss = 7.4896e-03, PNorm = 145.0440, GNorm = 0.4452, lr_0 = 3.8336e-04
Loss = 6.7143e-03, PNorm = 145.0638, GNorm = 0.1156, lr_0 = 3.8309e-04
Loss = 7.0868e-03, PNorm = 145.0783, GNorm = 0.3910, lr_0 = 3.8283e-04
Loss = 6.4742e-03, PNorm = 145.0927, GNorm = 0.6246, lr_0 = 3.8257e-04
Loss = 8.3144e-03, PNorm = 145.1105, GNorm = 0.1497, lr_0 = 3.8231e-04
Loss = 6.5732e-03, PNorm = 145.1275, GNorm = 0.2504, lr_0 = 3.8204e-04
Loss = 8.1423e-03, PNorm = 145.1460, GNorm = 0.2378, lr_0 = 3.8178e-04
Loss = 7.7192e-03, PNorm = 145.1633, GNorm = 0.1674, lr_0 = 3.8152e-04
Loss = 8.1481e-03, PNorm = 145.1831, GNorm = 0.2945, lr_0 = 3.8126e-04
Loss = 7.1301e-03, PNorm = 145.1996, GNorm = 0.1802, lr_0 = 3.8100e-04
Loss = 7.7318e-03, PNorm = 145.2144, GNorm = 0.2250, lr_0 = 3.8074e-04
Loss = 6.2095e-03, PNorm = 145.2311, GNorm = 0.2731, lr_0 = 3.8048e-04
Loss = 8.4307e-03, PNorm = 145.2449, GNorm = 0.1042, lr_0 = 3.8022e-04
Loss = 6.1527e-03, PNorm = 145.2614, GNorm = 0.2949, lr_0 = 3.7995e-04
Loss = 5.4690e-03, PNorm = 145.2794, GNorm = 0.5574, lr_0 = 3.7969e-04
Loss = 7.4053e-03, PNorm = 145.2985, GNorm = 0.2910, lr_0 = 3.7943e-04
Loss = 1.0833e-02, PNorm = 145.3147, GNorm = 0.5444, lr_0 = 3.7917e-04
Loss = 7.0947e-03, PNorm = 145.3346, GNorm = 0.4055, lr_0 = 3.7891e-04
Loss = 6.0803e-03, PNorm = 145.3583, GNorm = 0.1100, lr_0 = 3.7866e-04
Loss = 6.9433e-03, PNorm = 145.3750, GNorm = 0.1784, lr_0 = 3.7840e-04
Loss = 6.5407e-03, PNorm = 145.3919, GNorm = 0.5951, lr_0 = 3.7814e-04
Loss = 6.8595e-03, PNorm = 145.4081, GNorm = 0.6641, lr_0 = 3.7788e-04
Loss = 6.0345e-03, PNorm = 145.4278, GNorm = 0.1495, lr_0 = 3.7762e-04
Loss = 6.5034e-03, PNorm = 145.4455, GNorm = 0.1881, lr_0 = 3.7736e-04
Loss = 6.3476e-03, PNorm = 145.4629, GNorm = 0.2395, lr_0 = 3.7710e-04
Loss = 7.0840e-03, PNorm = 145.4780, GNorm = 0.1761, lr_0 = 3.7684e-04
Loss = 9.4247e-03, PNorm = 145.4914, GNorm = 0.0611, lr_0 = 3.7659e-04
Loss = 6.6957e-03, PNorm = 145.5069, GNorm = 0.3574, lr_0 = 3.7633e-04
Loss = 9.9973e-03, PNorm = 145.5286, GNorm = 0.4012, lr_0 = 3.7607e-04
Loss = 8.3870e-03, PNorm = 145.5482, GNorm = 0.2475, lr_0 = 3.7581e-04
Loss = 6.5250e-03, PNorm = 145.5669, GNorm = 0.0822, lr_0 = 3.7555e-04
Loss = 7.8064e-03, PNorm = 145.5882, GNorm = 0.2540, lr_0 = 3.7530e-04
Loss = 6.9358e-03, PNorm = 145.6068, GNorm = 0.3199, lr_0 = 3.7504e-04
Loss = 1.0375e-02, PNorm = 145.6243, GNorm = 0.0763, lr_0 = 3.7478e-04
Loss = 7.9349e-03, PNorm = 145.6441, GNorm = 0.1648, lr_0 = 3.7453e-04
Loss = 8.6473e-03, PNorm = 145.6665, GNorm = 0.3118, lr_0 = 3.7427e-04
Loss = 6.3350e-03, PNorm = 145.6853, GNorm = 0.2426, lr_0 = 3.7401e-04
Loss = 7.2245e-03, PNorm = 145.6992, GNorm = 0.4360, lr_0 = 3.7376e-04
Loss = 6.1155e-03, PNorm = 145.7107, GNorm = 0.1134, lr_0 = 3.7350e-04
Loss = 7.1417e-03, PNorm = 145.7278, GNorm = 0.0833, lr_0 = 3.7325e-04
Loss = 1.2291e-02, PNorm = 145.7450, GNorm = 0.2716, lr_0 = 3.7299e-04
Loss = 6.1268e-03, PNorm = 145.7658, GNorm = 0.2329, lr_0 = 3.7273e-04
Validation mae = 0.477867
Epoch 14
Loss = 5.8933e-03, PNorm = 145.7810, GNorm = 0.1950, lr_0 = 3.7248e-04
Loss = 6.4467e-03, PNorm = 145.7937, GNorm = 0.6236, lr_0 = 3.7222e-04
Loss = 6.8234e-03, PNorm = 145.8029, GNorm = 0.1677, lr_0 = 3.7197e-04
Loss = 6.7139e-03, PNorm = 145.8145, GNorm = 0.1808, lr_0 = 3.7171e-04
Loss = 8.0228e-03, PNorm = 145.8258, GNorm = 0.6457, lr_0 = 3.7146e-04
Loss = 6.5241e-03, PNorm = 145.8400, GNorm = 0.5615, lr_0 = 3.7120e-04
Loss = 8.7180e-03, PNorm = 145.8514, GNorm = 0.1307, lr_0 = 3.7095e-04
Loss = 6.9220e-03, PNorm = 145.8646, GNorm = 0.1046, lr_0 = 3.7070e-04
Loss = 7.1239e-03, PNorm = 145.8781, GNorm = 0.2225, lr_0 = 3.7044e-04
Loss = 5.6997e-03, PNorm = 145.8907, GNorm = 0.1662, lr_0 = 3.7019e-04
Loss = 5.6914e-03, PNorm = 145.9013, GNorm = 0.1279, lr_0 = 3.6993e-04
Loss = 5.5399e-03, PNorm = 145.9101, GNorm = 0.1220, lr_0 = 3.6968e-04
Loss = 5.5950e-03, PNorm = 145.9200, GNorm = 0.3801, lr_0 = 3.6943e-04
Loss = 5.8911e-03, PNorm = 145.9327, GNorm = 0.5277, lr_0 = 3.6917e-04
Loss = 9.2318e-03, PNorm = 145.9472, GNorm = 0.1985, lr_0 = 3.6892e-04
Loss = 6.7154e-03, PNorm = 145.9653, GNorm = 0.4594, lr_0 = 3.6867e-04
Loss = 5.2701e-03, PNorm = 145.9768, GNorm = 0.1328, lr_0 = 3.6842e-04
Loss = 5.5988e-03, PNorm = 145.9917, GNorm = 0.0847, lr_0 = 3.6816e-04
Loss = 5.7001e-03, PNorm = 146.0061, GNorm = 0.1642, lr_0 = 3.6791e-04
Loss = 5.9260e-03, PNorm = 146.0161, GNorm = 0.3620, lr_0 = 3.6766e-04
Loss = 5.2854e-03, PNorm = 146.0257, GNorm = 0.1098, lr_0 = 3.6741e-04
Loss = 5.4086e-03, PNorm = 146.0364, GNorm = 0.2192, lr_0 = 3.6716e-04
Loss = 5.8811e-03, PNorm = 146.0509, GNorm = 0.3191, lr_0 = 3.6690e-04
Loss = 8.8510e-03, PNorm = 146.0662, GNorm = 0.1844, lr_0 = 3.6665e-04
Loss = 7.2980e-03, PNorm = 146.0792, GNorm = 0.0742, lr_0 = 3.6640e-04
Loss = 5.9067e-03, PNorm = 146.0926, GNorm = 0.1870, lr_0 = 3.6615e-04
Loss = 5.4674e-03, PNorm = 146.1023, GNorm = 0.1229, lr_0 = 3.6590e-04
Loss = 8.9270e-03, PNorm = 146.1143, GNorm = 0.1993, lr_0 = 3.6565e-04
Loss = 6.6935e-03, PNorm = 146.1321, GNorm = 0.1919, lr_0 = 3.6540e-04
Loss = 5.9240e-03, PNorm = 146.1414, GNorm = 0.2112, lr_0 = 3.6515e-04
Loss = 5.8242e-03, PNorm = 146.1543, GNorm = 0.1034, lr_0 = 3.6490e-04
Loss = 7.5761e-03, PNorm = 146.1682, GNorm = 0.5322, lr_0 = 3.6465e-04
Loss = 5.5819e-03, PNorm = 146.1786, GNorm = 0.2365, lr_0 = 3.6440e-04
Loss = 5.3625e-03, PNorm = 146.1932, GNorm = 0.1633, lr_0 = 3.6415e-04
Loss = 6.0339e-03, PNorm = 146.2059, GNorm = 0.3401, lr_0 = 3.6390e-04
Loss = 8.4684e-03, PNorm = 146.2206, GNorm = 0.8434, lr_0 = 3.6365e-04
Loss = 6.2905e-03, PNorm = 146.2344, GNorm = 0.1942, lr_0 = 3.6340e-04
Loss = 7.1762e-03, PNorm = 146.2447, GNorm = 0.4926, lr_0 = 3.6315e-04
Loss = 8.1382e-03, PNorm = 146.2576, GNorm = 0.2266, lr_0 = 3.6290e-04
Loss = 5.2208e-03, PNorm = 146.2729, GNorm = 0.3285, lr_0 = 3.6266e-04
Loss = 9.3786e-03, PNorm = 146.2843, GNorm = 0.1579, lr_0 = 3.6241e-04
Loss = 6.6696e-03, PNorm = 146.2958, GNorm = 0.4132, lr_0 = 3.6216e-04
Loss = 6.0051e-03, PNorm = 146.3058, GNorm = 0.2251, lr_0 = 3.6191e-04
Loss = 6.1704e-03, PNorm = 146.3179, GNorm = 0.2866, lr_0 = 3.6166e-04
Loss = 5.7333e-03, PNorm = 146.3332, GNorm = 0.0906, lr_0 = 3.6141e-04
Loss = 5.8023e-03, PNorm = 146.3484, GNorm = 0.3553, lr_0 = 3.6117e-04
Loss = 6.4519e-03, PNorm = 146.3604, GNorm = 0.1517, lr_0 = 3.6092e-04
Loss = 9.4020e-03, PNorm = 146.3734, GNorm = 0.2885, lr_0 = 3.6067e-04
Loss = 5.6409e-03, PNorm = 146.3882, GNorm = 0.0817, lr_0 = 3.6043e-04
Loss = 5.5880e-03, PNorm = 146.4010, GNorm = 0.1377, lr_0 = 3.6018e-04
Loss = 5.6480e-03, PNorm = 146.4126, GNorm = 0.1036, lr_0 = 3.5993e-04
Loss = 4.7492e-03, PNorm = 146.4261, GNorm = 0.5419, lr_0 = 3.5969e-04
Loss = 5.9835e-03, PNorm = 146.4434, GNorm = 0.1294, lr_0 = 3.5944e-04
Loss = 5.4934e-03, PNorm = 146.4578, GNorm = 0.2390, lr_0 = 3.5919e-04
Loss = 5.9262e-03, PNorm = 146.4708, GNorm = 0.1731, lr_0 = 3.5895e-04
Loss = 6.6115e-03, PNorm = 146.4849, GNorm = 0.2581, lr_0 = 3.5870e-04
Loss = 6.0000e-03, PNorm = 146.5008, GNorm = 0.5828, lr_0 = 3.5845e-04
Loss = 6.1325e-03, PNorm = 146.5111, GNorm = 0.4161, lr_0 = 3.5821e-04
Loss = 5.1882e-03, PNorm = 146.5221, GNorm = 0.3630, lr_0 = 3.5796e-04
Loss = 6.3599e-03, PNorm = 146.5344, GNorm = 0.3431, lr_0 = 3.5772e-04
Loss = 5.6397e-03, PNorm = 146.5471, GNorm = 0.2185, lr_0 = 3.5747e-04
Loss = 5.9299e-03, PNorm = 146.5599, GNorm = 0.2177, lr_0 = 3.5723e-04
Loss = 4.8236e-03, PNorm = 146.5731, GNorm = 0.1933, lr_0 = 3.5698e-04
Loss = 7.0627e-03, PNorm = 146.5865, GNorm = 0.1036, lr_0 = 3.5674e-04
Loss = 6.7495e-03, PNorm = 146.6011, GNorm = 0.1476, lr_0 = 3.5650e-04
Loss = 5.8098e-03, PNorm = 146.6152, GNorm = 0.3513, lr_0 = 3.5625e-04
Loss = 5.8791e-03, PNorm = 146.6298, GNorm = 0.2586, lr_0 = 3.5601e-04
Loss = 6.3279e-03, PNorm = 146.6444, GNorm = 0.3272, lr_0 = 3.5576e-04
Loss = 5.9990e-03, PNorm = 146.6604, GNorm = 0.2799, lr_0 = 3.5552e-04
Loss = 6.7057e-03, PNorm = 146.6737, GNorm = 0.0960, lr_0 = 3.5528e-04
Loss = 5.8474e-03, PNorm = 146.6840, GNorm = 0.0896, lr_0 = 3.5503e-04
Loss = 5.0297e-03, PNorm = 146.6954, GNorm = 0.1203, lr_0 = 3.5479e-04
Loss = 5.2274e-03, PNorm = 146.7114, GNorm = 0.1255, lr_0 = 3.5455e-04
Loss = 7.5665e-03, PNorm = 146.7269, GNorm = 0.1302, lr_0 = 3.5430e-04
Loss = 6.6939e-03, PNorm = 146.7425, GNorm = 0.0829, lr_0 = 3.5406e-04
Loss = 5.6385e-03, PNorm = 146.7560, GNorm = 0.2488, lr_0 = 3.5382e-04
Loss = 5.4111e-03, PNorm = 146.7696, GNorm = 0.2503, lr_0 = 3.5358e-04
Loss = 6.3962e-03, PNorm = 146.7846, GNorm = 0.2237, lr_0 = 3.5333e-04
Loss = 6.4443e-03, PNorm = 146.8001, GNorm = 0.2242, lr_0 = 3.5309e-04
Loss = 6.6334e-03, PNorm = 146.8114, GNorm = 0.0906, lr_0 = 3.5285e-04
Loss = 7.1031e-03, PNorm = 146.8211, GNorm = 0.5803, lr_0 = 3.5261e-04
Loss = 5.0433e-03, PNorm = 146.8370, GNorm = 0.0700, lr_0 = 3.5237e-04
Loss = 5.0075e-03, PNorm = 146.8556, GNorm = 0.1360, lr_0 = 3.5212e-04
Loss = 4.9287e-03, PNorm = 146.8681, GNorm = 0.1828, lr_0 = 3.5188e-04
Loss = 6.4811e-03, PNorm = 146.8778, GNorm = 0.1954, lr_0 = 3.5164e-04
Loss = 6.6731e-03, PNorm = 146.8933, GNorm = 0.4681, lr_0 = 3.5140e-04
Loss = 6.7614e-03, PNorm = 146.9042, GNorm = 0.4361, lr_0 = 3.5116e-04
Loss = 5.5392e-03, PNorm = 146.9204, GNorm = 0.1603, lr_0 = 3.5092e-04
Loss = 9.2923e-03, PNorm = 146.9368, GNorm = 0.1455, lr_0 = 3.5068e-04
Loss = 6.2827e-03, PNorm = 146.9518, GNorm = 0.1838, lr_0 = 3.5044e-04
Loss = 5.8512e-03, PNorm = 146.9653, GNorm = 0.1605, lr_0 = 3.5020e-04
Loss = 5.2174e-03, PNorm = 146.9780, GNorm = 0.0884, lr_0 = 3.4996e-04
Loss = 5.5311e-03, PNorm = 146.9870, GNorm = 0.1870, lr_0 = 3.4972e-04
Loss = 6.8351e-03, PNorm = 146.9973, GNorm = 0.2045, lr_0 = 3.4948e-04
Loss = 6.3671e-03, PNorm = 147.0111, GNorm = 0.1836, lr_0 = 3.4924e-04
Loss = 5.3707e-03, PNorm = 147.0265, GNorm = 0.4652, lr_0 = 3.4900e-04
Loss = 5.5373e-03, PNorm = 147.0416, GNorm = 0.3587, lr_0 = 3.4876e-04
Loss = 5.7892e-03, PNorm = 147.0565, GNorm = 0.1396, lr_0 = 3.4852e-04
Loss = 6.6311e-03, PNorm = 147.0692, GNorm = 0.2292, lr_0 = 3.4828e-04
Loss = 6.8390e-03, PNorm = 147.0836, GNorm = 0.2757, lr_0 = 3.4805e-04
Loss = 5.8928e-03, PNorm = 147.0969, GNorm = 0.2219, lr_0 = 3.4781e-04
Loss = 4.8696e-03, PNorm = 147.1081, GNorm = 0.0886, lr_0 = 3.4757e-04
Loss = 6.1361e-03, PNorm = 147.1211, GNorm = 0.1726, lr_0 = 3.4733e-04
Loss = 9.6979e-03, PNorm = 147.1394, GNorm = 0.3215, lr_0 = 3.4709e-04
Loss = 7.0000e-03, PNorm = 147.1596, GNorm = 0.2618, lr_0 = 3.4686e-04
Loss = 5.3706e-03, PNorm = 147.1724, GNorm = 0.2583, lr_0 = 3.4662e-04
Loss = 5.5349e-03, PNorm = 147.1852, GNorm = 0.2714, lr_0 = 3.4638e-04
Loss = 5.8019e-03, PNorm = 147.2024, GNorm = 0.2350, lr_0 = 3.4614e-04
Loss = 5.1750e-03, PNorm = 147.2179, GNorm = 0.2288, lr_0 = 3.4591e-04
Loss = 5.8417e-03, PNorm = 147.2295, GNorm = 0.3084, lr_0 = 3.4567e-04
Loss = 5.2265e-03, PNorm = 147.2393, GNorm = 0.2026, lr_0 = 3.4543e-04
Loss = 5.6274e-03, PNorm = 147.2513, GNorm = 0.1217, lr_0 = 3.4520e-04
Loss = 6.2960e-03, PNorm = 147.2689, GNorm = 0.2700, lr_0 = 3.4496e-04
Loss = 9.3551e-03, PNorm = 147.2850, GNorm = 0.4619, lr_0 = 3.4472e-04
Loss = 5.5246e-03, PNorm = 147.2999, GNorm = 0.5987, lr_0 = 3.4449e-04
Loss = 6.5403e-03, PNorm = 147.3130, GNorm = 0.1381, lr_0 = 3.4425e-04
Loss = 5.7038e-03, PNorm = 147.3265, GNorm = 0.1109, lr_0 = 3.4402e-04
Loss = 8.8733e-03, PNorm = 147.3398, GNorm = 0.3418, lr_0 = 3.4378e-04
Loss = 7.8311e-03, PNorm = 147.3565, GNorm = 0.2876, lr_0 = 3.4354e-04
Loss = 7.8255e-03, PNorm = 147.3693, GNorm = 0.1903, lr_0 = 3.4331e-04
Validation mae = 0.477244
Epoch 15
Loss = 6.3960e-03, PNorm = 147.3808, GNorm = 0.4134, lr_0 = 3.4307e-04
Loss = 5.8162e-03, PNorm = 147.3954, GNorm = 0.2800, lr_0 = 3.4284e-04
Loss = 7.1007e-03, PNorm = 147.4070, GNorm = 0.3474, lr_0 = 3.4260e-04
Loss = 5.8811e-03, PNorm = 147.4125, GNorm = 0.2682, lr_0 = 3.4237e-04
Loss = 5.7922e-03, PNorm = 147.4180, GNorm = 0.0997, lr_0 = 3.4213e-04
Loss = 5.3068e-03, PNorm = 147.4262, GNorm = 0.0968, lr_0 = 3.4190e-04
Loss = 5.7505e-03, PNorm = 147.4380, GNorm = 0.2664, lr_0 = 3.4167e-04
Loss = 5.0372e-03, PNorm = 147.4523, GNorm = 0.2845, lr_0 = 3.4143e-04
Loss = 5.4572e-03, PNorm = 147.4624, GNorm = 0.1293, lr_0 = 3.4120e-04
Loss = 5.4470e-03, PNorm = 147.4728, GNorm = 0.2742, lr_0 = 3.4096e-04
Loss = 6.1516e-03, PNorm = 147.4842, GNorm = 0.3254, lr_0 = 3.4073e-04
Loss = 7.0764e-03, PNorm = 147.4964, GNorm = 0.1318, lr_0 = 3.4050e-04
Loss = 6.0540e-03, PNorm = 147.5076, GNorm = 0.6292, lr_0 = 3.4026e-04
Loss = 5.1247e-03, PNorm = 147.5210, GNorm = 0.1437, lr_0 = 3.4003e-04
Loss = 5.2643e-03, PNorm = 147.5343, GNorm = 0.2687, lr_0 = 3.3980e-04
Loss = 5.3678e-03, PNorm = 147.5473, GNorm = 0.2259, lr_0 = 3.3956e-04
Loss = 6.2872e-03, PNorm = 147.5596, GNorm = 0.4508, lr_0 = 3.3933e-04
Loss = 5.1526e-03, PNorm = 147.5691, GNorm = 0.3444, lr_0 = 3.3910e-04
Loss = 4.7530e-03, PNorm = 147.5772, GNorm = 0.1240, lr_0 = 3.3887e-04
Loss = 6.1578e-03, PNorm = 147.5863, GNorm = 0.1130, lr_0 = 3.3864e-04
Loss = 4.6362e-03, PNorm = 147.5959, GNorm = 0.0785, lr_0 = 3.3840e-04
Loss = 5.2666e-03, PNorm = 147.6070, GNorm = 0.1242, lr_0 = 3.3817e-04
Loss = 5.0514e-03, PNorm = 147.6162, GNorm = 0.1577, lr_0 = 3.3794e-04
Loss = 5.2040e-03, PNorm = 147.6264, GNorm = 0.4066, lr_0 = 3.3771e-04
Loss = 5.8781e-03, PNorm = 147.6361, GNorm = 0.2472, lr_0 = 3.3748e-04
Loss = 5.1923e-03, PNorm = 147.6477, GNorm = 0.2894, lr_0 = 3.3725e-04
Loss = 5.0957e-03, PNorm = 147.6625, GNorm = 0.3604, lr_0 = 3.3701e-04
Loss = 5.4076e-03, PNorm = 147.6745, GNorm = 0.5137, lr_0 = 3.3678e-04
Loss = 3.9816e-03, PNorm = 147.6828, GNorm = 0.2213, lr_0 = 3.3655e-04
Loss = 4.5893e-03, PNorm = 147.6940, GNorm = 0.1618, lr_0 = 3.3632e-04
Loss = 4.6355e-03, PNorm = 147.7030, GNorm = 0.1894, lr_0 = 3.3609e-04
Loss = 4.3560e-03, PNorm = 147.7169, GNorm = 0.1421, lr_0 = 3.3586e-04
Loss = 7.9613e-03, PNorm = 147.7311, GNorm = 0.1004, lr_0 = 3.3563e-04
Loss = 5.3966e-03, PNorm = 147.7423, GNorm = 0.2180, lr_0 = 3.3540e-04
Loss = 7.3629e-03, PNorm = 147.7524, GNorm = 0.1532, lr_0 = 3.3517e-04
Loss = 4.5118e-03, PNorm = 147.7631, GNorm = 0.2996, lr_0 = 3.3494e-04
Loss = 5.4770e-03, PNorm = 147.7787, GNorm = 0.0782, lr_0 = 3.3471e-04
Loss = 5.0635e-03, PNorm = 147.7895, GNorm = 0.3020, lr_0 = 3.3448e-04
Loss = 4.2357e-03, PNorm = 147.7980, GNorm = 0.1845, lr_0 = 3.3425e-04
Loss = 4.8759e-03, PNorm = 147.8090, GNorm = 0.1473, lr_0 = 3.3403e-04
Loss = 4.4354e-03, PNorm = 147.8187, GNorm = 0.1573, lr_0 = 3.3380e-04
Loss = 5.6827e-03, PNorm = 147.8287, GNorm = 0.1654, lr_0 = 3.3357e-04
Loss = 6.3239e-03, PNorm = 147.8391, GNorm = 0.0970, lr_0 = 3.3334e-04
Loss = 8.2127e-03, PNorm = 147.8505, GNorm = 0.1741, lr_0 = 3.3311e-04
Loss = 5.3548e-03, PNorm = 147.8606, GNorm = 0.2025, lr_0 = 3.3288e-04
Loss = 5.0308e-03, PNorm = 147.8719, GNorm = 0.0985, lr_0 = 3.3265e-04
Loss = 5.7563e-03, PNorm = 147.8842, GNorm = 0.1528, lr_0 = 3.3243e-04
Loss = 4.0187e-03, PNorm = 147.8932, GNorm = 0.2232, lr_0 = 3.3220e-04
Loss = 6.3023e-03, PNorm = 147.9027, GNorm = 0.2397, lr_0 = 3.3197e-04
Loss = 4.5647e-03, PNorm = 147.9107, GNorm = 0.2839, lr_0 = 3.3174e-04
Loss = 5.9275e-03, PNorm = 147.9245, GNorm = 0.2678, lr_0 = 3.3152e-04
Loss = 5.4300e-03, PNorm = 147.9379, GNorm = 0.2282, lr_0 = 3.3129e-04
Loss = 4.9176e-03, PNorm = 147.9499, GNorm = 0.1786, lr_0 = 3.3106e-04
Loss = 5.2854e-03, PNorm = 147.9618, GNorm = 0.1664, lr_0 = 3.3084e-04
Loss = 8.0453e-03, PNorm = 147.9710, GNorm = 0.4668, lr_0 = 3.3061e-04
Loss = 5.1619e-03, PNorm = 147.9848, GNorm = 0.3498, lr_0 = 3.3038e-04
Loss = 7.0844e-03, PNorm = 147.9993, GNorm = 0.2609, lr_0 = 3.3016e-04
Loss = 5.1612e-03, PNorm = 148.0135, GNorm = 0.3086, lr_0 = 3.2993e-04
Loss = 5.1459e-03, PNorm = 148.0268, GNorm = 0.2015, lr_0 = 3.2970e-04
Loss = 4.6565e-03, PNorm = 148.0397, GNorm = 0.3691, lr_0 = 3.2948e-04
Loss = 5.6428e-03, PNorm = 148.0488, GNorm = 0.4605, lr_0 = 3.2925e-04
Loss = 5.0187e-03, PNorm = 148.0606, GNorm = 0.1987, lr_0 = 3.2903e-04
Loss = 5.8499e-03, PNorm = 148.0687, GNorm = 0.1620, lr_0 = 3.2880e-04
Loss = 5.8586e-03, PNorm = 148.0784, GNorm = 0.2514, lr_0 = 3.2858e-04
Loss = 5.2721e-03, PNorm = 148.0897, GNorm = 0.3497, lr_0 = 3.2835e-04
Loss = 4.9087e-03, PNorm = 148.1019, GNorm = 0.1832, lr_0 = 3.2813e-04
Loss = 4.3368e-03, PNorm = 148.1109, GNorm = 0.2869, lr_0 = 3.2790e-04
Loss = 4.6019e-03, PNorm = 148.1216, GNorm = 0.1755, lr_0 = 3.2768e-04
Loss = 4.5746e-03, PNorm = 148.1346, GNorm = 0.1168, lr_0 = 3.2745e-04
Loss = 5.5881e-03, PNorm = 148.1456, GNorm = 0.2096, lr_0 = 3.2723e-04
Loss = 4.5193e-03, PNorm = 148.1599, GNorm = 0.1448, lr_0 = 3.2700e-04
Loss = 4.9564e-03, PNorm = 148.1713, GNorm = 0.2598, lr_0 = 3.2678e-04
Loss = 4.2960e-03, PNorm = 148.1827, GNorm = 0.3198, lr_0 = 3.2656e-04
Loss = 4.6312e-03, PNorm = 148.1933, GNorm = 0.3335, lr_0 = 3.2633e-04
Loss = 5.8587e-03, PNorm = 148.2054, GNorm = 0.1269, lr_0 = 3.2611e-04
Loss = 4.4964e-03, PNorm = 148.2156, GNorm = 0.1476, lr_0 = 3.2589e-04
Loss = 4.4075e-03, PNorm = 148.2287, GNorm = 0.3007, lr_0 = 3.2566e-04
Loss = 4.6815e-03, PNorm = 148.2392, GNorm = 0.1651, lr_0 = 3.2544e-04
Loss = 5.6231e-03, PNorm = 148.2467, GNorm = 0.2667, lr_0 = 3.2522e-04
Loss = 3.9792e-03, PNorm = 148.2540, GNorm = 0.1017, lr_0 = 3.2499e-04
Loss = 5.1234e-03, PNorm = 148.2695, GNorm = 0.1829, lr_0 = 3.2477e-04
Loss = 5.7140e-03, PNorm = 148.2794, GNorm = 0.2798, lr_0 = 3.2455e-04
Loss = 5.1516e-03, PNorm = 148.2905, GNorm = 0.0962, lr_0 = 3.2433e-04
Loss = 5.8744e-03, PNorm = 148.3047, GNorm = 0.0702, lr_0 = 3.2410e-04
Loss = 5.7127e-03, PNorm = 148.3168, GNorm = 0.2916, lr_0 = 3.2388e-04
Loss = 5.2955e-03, PNorm = 148.3326, GNorm = 0.1380, lr_0 = 3.2366e-04
Loss = 5.8502e-03, PNorm = 148.3480, GNorm = 0.0996, lr_0 = 3.2344e-04
Loss = 4.8500e-03, PNorm = 148.3600, GNorm = 0.2393, lr_0 = 3.2322e-04
Loss = 4.3234e-03, PNorm = 148.3732, GNorm = 0.2825, lr_0 = 3.2300e-04
Loss = 5.4971e-03, PNorm = 148.3841, GNorm = 0.2298, lr_0 = 3.2277e-04
Loss = 5.1131e-03, PNorm = 148.3972, GNorm = 0.1335, lr_0 = 3.2255e-04
Loss = 4.8751e-03, PNorm = 148.4064, GNorm = 0.2263, lr_0 = 3.2233e-04
Loss = 5.8122e-03, PNorm = 148.4190, GNorm = 0.1095, lr_0 = 3.2211e-04
Loss = 4.7355e-03, PNorm = 148.4314, GNorm = 0.2362, lr_0 = 3.2189e-04
Loss = 8.6505e-03, PNorm = 148.4420, GNorm = 0.2502, lr_0 = 3.2167e-04
Loss = 6.3846e-03, PNorm = 148.4544, GNorm = 0.4556, lr_0 = 3.2145e-04
Loss = 5.3095e-03, PNorm = 148.4670, GNorm = 0.2607, lr_0 = 3.2123e-04
Loss = 6.3207e-03, PNorm = 148.4780, GNorm = 0.1368, lr_0 = 3.2101e-04
Loss = 5.0801e-03, PNorm = 148.4875, GNorm = 0.0780, lr_0 = 3.2079e-04
Loss = 5.3866e-03, PNorm = 148.4932, GNorm = 0.0943, lr_0 = 3.2057e-04
Loss = 4.2974e-03, PNorm = 148.5013, GNorm = 0.0662, lr_0 = 3.2035e-04
Loss = 6.0128e-03, PNorm = 148.5105, GNorm = 0.2656, lr_0 = 3.2013e-04
Loss = 4.4360e-03, PNorm = 148.5221, GNorm = 0.5572, lr_0 = 3.1991e-04
Loss = 5.2078e-03, PNorm = 148.5359, GNorm = 0.2371, lr_0 = 3.1969e-04
Loss = 7.7527e-03, PNorm = 148.5517, GNorm = 0.1639, lr_0 = 3.1947e-04
Loss = 4.4212e-03, PNorm = 148.5685, GNorm = 0.0781, lr_0 = 3.1925e-04
Loss = 6.2907e-03, PNorm = 148.5820, GNorm = 0.0949, lr_0 = 3.1904e-04
Loss = 4.2880e-03, PNorm = 148.5944, GNorm = 0.0819, lr_0 = 3.1882e-04
Loss = 8.7438e-03, PNorm = 148.6055, GNorm = 0.1235, lr_0 = 3.1860e-04
Loss = 6.0438e-03, PNorm = 148.6169, GNorm = 0.1382, lr_0 = 3.1838e-04
Loss = 4.4079e-03, PNorm = 148.6309, GNorm = 0.1601, lr_0 = 3.1816e-04
Loss = 4.7204e-03, PNorm = 148.6429, GNorm = 0.1206, lr_0 = 3.1794e-04
Loss = 4.8594e-03, PNorm = 148.6552, GNorm = 0.2507, lr_0 = 3.1773e-04
Loss = 6.2169e-03, PNorm = 148.6663, GNorm = 0.0999, lr_0 = 3.1751e-04
Loss = 6.6622e-03, PNorm = 148.6772, GNorm = 0.3678, lr_0 = 3.1729e-04
Loss = 4.9813e-03, PNorm = 148.6910, GNorm = 0.1657, lr_0 = 3.1707e-04
Loss = 5.6721e-03, PNorm = 148.7020, GNorm = 0.2215, lr_0 = 3.1686e-04
Loss = 4.5752e-03, PNorm = 148.7126, GNorm = 0.2830, lr_0 = 3.1664e-04
Loss = 5.4221e-03, PNorm = 148.7240, GNorm = 0.1110, lr_0 = 3.1642e-04
Loss = 5.8266e-03, PNorm = 148.7364, GNorm = 0.1742, lr_0 = 3.1621e-04
Validation mae = 0.476976
Epoch 16
Loss = 4.1417e-03, PNorm = 148.7442, GNorm = 0.2492, lr_0 = 3.1599e-04
Loss = 4.9988e-03, PNorm = 148.7502, GNorm = 0.3400, lr_0 = 3.1577e-04
Loss = 4.3814e-03, PNorm = 148.7559, GNorm = 0.1244, lr_0 = 3.1556e-04
Loss = 3.9332e-03, PNorm = 148.7612, GNorm = 0.0715, lr_0 = 3.1534e-04
Loss = 5.1906e-03, PNorm = 148.7726, GNorm = 0.1393, lr_0 = 3.1512e-04
Loss = 4.4640e-03, PNorm = 148.7809, GNorm = 0.0732, lr_0 = 3.1491e-04
Loss = 4.3779e-03, PNorm = 148.7926, GNorm = 0.1363, lr_0 = 3.1469e-04
Loss = 4.4016e-03, PNorm = 148.8014, GNorm = 0.2653, lr_0 = 3.1448e-04
Loss = 6.2064e-03, PNorm = 148.8092, GNorm = 0.3711, lr_0 = 3.1426e-04
Loss = 4.9303e-03, PNorm = 148.8166, GNorm = 0.1360, lr_0 = 3.1405e-04
Loss = 5.5122e-03, PNorm = 148.8269, GNorm = 0.3782, lr_0 = 3.1383e-04
Loss = 4.2468e-03, PNorm = 148.8371, GNorm = 0.2123, lr_0 = 3.1362e-04
Loss = 3.9790e-03, PNorm = 148.8481, GNorm = 0.2722, lr_0 = 3.1340e-04
Loss = 4.0366e-03, PNorm = 148.8569, GNorm = 0.0770, lr_0 = 3.1319e-04
Loss = 3.7865e-03, PNorm = 148.8671, GNorm = 0.1018, lr_0 = 3.1297e-04
Loss = 4.8450e-03, PNorm = 148.8750, GNorm = 0.3084, lr_0 = 3.1276e-04
Loss = 4.5394e-03, PNorm = 148.8851, GNorm = 0.0561, lr_0 = 3.1254e-04
Loss = 4.3231e-03, PNorm = 148.8916, GNorm = 0.3500, lr_0 = 3.1233e-04
Loss = 4.3090e-03, PNorm = 148.9009, GNorm = 0.1727, lr_0 = 3.1212e-04
Loss = 3.3204e-03, PNorm = 148.9103, GNorm = 0.0953, lr_0 = 3.1190e-04
Loss = 4.3274e-03, PNorm = 148.9185, GNorm = 0.1281, lr_0 = 3.1169e-04
Loss = 6.3503e-03, PNorm = 148.9238, GNorm = 0.2578, lr_0 = 3.1147e-04
Loss = 5.0547e-03, PNorm = 148.9295, GNorm = 0.1432, lr_0 = 3.1126e-04
Loss = 6.0809e-03, PNorm = 148.9383, GNorm = 0.1958, lr_0 = 3.1105e-04
Loss = 7.7520e-03, PNorm = 148.9470, GNorm = 0.2940, lr_0 = 3.1083e-04
Loss = 4.2563e-03, PNorm = 148.9575, GNorm = 0.1342, lr_0 = 3.1062e-04
Loss = 3.7618e-03, PNorm = 148.9664, GNorm = 0.0712, lr_0 = 3.1041e-04
Loss = 4.6517e-03, PNorm = 148.9775, GNorm = 0.1065, lr_0 = 3.1020e-04
Loss = 4.0850e-03, PNorm = 148.9871, GNorm = 0.2325, lr_0 = 3.0998e-04
Loss = 4.2293e-03, PNorm = 148.9981, GNorm = 0.1178, lr_0 = 3.0977e-04
Loss = 4.0684e-03, PNorm = 149.0064, GNorm = 0.2854, lr_0 = 3.0956e-04
Loss = 5.0369e-03, PNorm = 149.0160, GNorm = 0.0673, lr_0 = 3.0935e-04
Loss = 4.4441e-03, PNorm = 149.0253, GNorm = 0.1889, lr_0 = 3.0914e-04
Loss = 6.1306e-03, PNorm = 149.0347, GNorm = 0.2296, lr_0 = 3.0892e-04
Loss = 3.9336e-03, PNorm = 149.0486, GNorm = 0.3279, lr_0 = 3.0871e-04
Loss = 3.8119e-03, PNorm = 149.0594, GNorm = 0.1945, lr_0 = 3.0850e-04
Loss = 3.6882e-03, PNorm = 149.0705, GNorm = 0.1538, lr_0 = 3.0829e-04
Loss = 5.5981e-03, PNorm = 149.0791, GNorm = 0.1969, lr_0 = 3.0808e-04
Loss = 4.8246e-03, PNorm = 149.0883, GNorm = 0.4270, lr_0 = 3.0787e-04
Loss = 3.5528e-03, PNorm = 149.0970, GNorm = 0.2455, lr_0 = 3.0766e-04
Loss = 5.0464e-03, PNorm = 149.1041, GNorm = 0.2195, lr_0 = 3.0745e-04
Loss = 6.4171e-03, PNorm = 149.1115, GNorm = 0.2328, lr_0 = 3.0723e-04
Loss = 4.2816e-03, PNorm = 149.1237, GNorm = 0.1491, lr_0 = 3.0702e-04
Loss = 5.3679e-03, PNorm = 149.1339, GNorm = 0.0812, lr_0 = 3.0681e-04
Loss = 3.9824e-03, PNorm = 149.1459, GNorm = 0.0566, lr_0 = 3.0660e-04
Loss = 4.8250e-03, PNorm = 149.1512, GNorm = 0.3547, lr_0 = 3.0639e-04
Loss = 5.8928e-03, PNorm = 149.1569, GNorm = 0.1690, lr_0 = 3.0618e-04
Loss = 6.9142e-03, PNorm = 149.1679, GNorm = 0.2344, lr_0 = 3.0597e-04
Loss = 3.8717e-03, PNorm = 149.1810, GNorm = 0.1808, lr_0 = 3.0576e-04
Loss = 3.5930e-03, PNorm = 149.1923, GNorm = 0.0998, lr_0 = 3.0555e-04
Loss = 8.9298e-03, PNorm = 149.2028, GNorm = 0.1449, lr_0 = 3.0535e-04
Loss = 6.5317e-03, PNorm = 149.2131, GNorm = 0.4039, lr_0 = 3.0514e-04
Loss = 5.1279e-03, PNorm = 149.2206, GNorm = 0.2143, lr_0 = 3.0493e-04
Loss = 4.1508e-03, PNorm = 149.2293, GNorm = 0.2949, lr_0 = 3.0472e-04
Loss = 5.2534e-03, PNorm = 149.2369, GNorm = 0.1230, lr_0 = 3.0451e-04
Loss = 4.7711e-03, PNorm = 149.2456, GNorm = 0.1137, lr_0 = 3.0430e-04
Loss = 4.9994e-03, PNorm = 149.2559, GNorm = 0.3279, lr_0 = 3.0409e-04
Loss = 3.3633e-03, PNorm = 149.2649, GNorm = 0.0930, lr_0 = 3.0388e-04
Loss = 4.3269e-03, PNorm = 149.2744, GNorm = 0.4588, lr_0 = 3.0368e-04
Loss = 4.4808e-03, PNorm = 149.2810, GNorm = 0.0464, lr_0 = 3.0347e-04
Loss = 4.3514e-03, PNorm = 149.2902, GNorm = 0.1943, lr_0 = 3.0326e-04
Loss = 4.1524e-03, PNorm = 149.3002, GNorm = 0.2099, lr_0 = 3.0305e-04
Loss = 4.7469e-03, PNorm = 149.3107, GNorm = 0.1684, lr_0 = 3.0284e-04
Loss = 5.2549e-03, PNorm = 149.3240, GNorm = 0.3085, lr_0 = 3.0264e-04
Loss = 5.0113e-03, PNorm = 149.3355, GNorm = 0.1946, lr_0 = 3.0243e-04
Loss = 6.0568e-03, PNorm = 149.3466, GNorm = 0.2986, lr_0 = 3.0222e-04
Loss = 4.0205e-03, PNorm = 149.3585, GNorm = 0.0750, lr_0 = 3.0202e-04
Loss = 4.2652e-03, PNorm = 149.3657, GNorm = 0.3012, lr_0 = 3.0181e-04
Loss = 3.7782e-03, PNorm = 149.3742, GNorm = 0.1415, lr_0 = 3.0160e-04
Loss = 4.1161e-03, PNorm = 149.3815, GNorm = 0.1915, lr_0 = 3.0140e-04
Loss = 4.3881e-03, PNorm = 149.3885, GNorm = 0.2397, lr_0 = 3.0119e-04
Loss = 3.9222e-03, PNorm = 149.3995, GNorm = 0.1584, lr_0 = 3.0098e-04
Loss = 5.2510e-03, PNorm = 149.4107, GNorm = 0.0774, lr_0 = 3.0078e-04
Loss = 4.5542e-03, PNorm = 149.4224, GNorm = 0.1297, lr_0 = 3.0057e-04
Loss = 3.6496e-03, PNorm = 149.4314, GNorm = 0.2082, lr_0 = 3.0036e-04
Loss = 4.4443e-03, PNorm = 149.4449, GNorm = 0.1403, lr_0 = 3.0016e-04
Loss = 5.0997e-03, PNorm = 149.4593, GNorm = 0.0824, lr_0 = 2.9995e-04
Loss = 3.8460e-03, PNorm = 149.4680, GNorm = 0.1694, lr_0 = 2.9975e-04
Loss = 4.1477e-03, PNorm = 149.4765, GNorm = 0.1112, lr_0 = 2.9954e-04
Loss = 4.1997e-03, PNorm = 149.4881, GNorm = 0.3038, lr_0 = 2.9934e-04
Loss = 3.8491e-03, PNorm = 149.4970, GNorm = 0.2708, lr_0 = 2.9913e-04
Loss = 4.7352e-03, PNorm = 149.5082, GNorm = 0.2947, lr_0 = 2.9893e-04
Loss = 3.8698e-03, PNorm = 149.5193, GNorm = 0.1761, lr_0 = 2.9872e-04
Loss = 3.4789e-03, PNorm = 149.5288, GNorm = 0.2551, lr_0 = 2.9852e-04
Loss = 4.4631e-03, PNorm = 149.5390, GNorm = 0.3664, lr_0 = 2.9831e-04
Loss = 4.3821e-03, PNorm = 149.5468, GNorm = 0.2178, lr_0 = 2.9811e-04
Loss = 4.1704e-03, PNorm = 149.5553, GNorm = 0.3647, lr_0 = 2.9790e-04
Loss = 3.5740e-03, PNorm = 149.5654, GNorm = 0.1506, lr_0 = 2.9770e-04
Loss = 3.8308e-03, PNorm = 149.5732, GNorm = 0.3408, lr_0 = 2.9750e-04
Loss = 5.3096e-03, PNorm = 149.5808, GNorm = 0.0884, lr_0 = 2.9729e-04
Loss = 4.3750e-03, PNorm = 149.5938, GNorm = 0.2218, lr_0 = 2.9709e-04
Loss = 4.4710e-03, PNorm = 149.6066, GNorm = 0.1158, lr_0 = 2.9689e-04
Loss = 3.1029e-03, PNorm = 149.6146, GNorm = 0.0806, lr_0 = 2.9668e-04
Loss = 3.7217e-03, PNorm = 149.6222, GNorm = 0.1602, lr_0 = 2.9648e-04
Loss = 6.1290e-03, PNorm = 149.6298, GNorm = 0.1555, lr_0 = 2.9628e-04
Loss = 4.0626e-03, PNorm = 149.6396, GNorm = 0.3446, lr_0 = 2.9607e-04
Loss = 4.2152e-03, PNorm = 149.6491, GNorm = 0.2179, lr_0 = 2.9587e-04
Loss = 4.1300e-03, PNorm = 149.6575, GNorm = 0.0982, lr_0 = 2.9567e-04
Loss = 4.9466e-03, PNorm = 149.6638, GNorm = 0.3040, lr_0 = 2.9546e-04
Loss = 4.7347e-03, PNorm = 149.6734, GNorm = 0.1735, lr_0 = 2.9526e-04
Loss = 3.4892e-03, PNorm = 149.6865, GNorm = 0.3287, lr_0 = 2.9506e-04
Loss = 7.4599e-03, PNorm = 149.6973, GNorm = 0.5127, lr_0 = 2.9486e-04
Loss = 4.2924e-03, PNorm = 149.7084, GNorm = 0.3828, lr_0 = 2.9466e-04
Loss = 3.4011e-03, PNorm = 149.7168, GNorm = 0.0732, lr_0 = 2.9445e-04
Loss = 5.1704e-03, PNorm = 149.7263, GNorm = 0.1520, lr_0 = 2.9425e-04
Loss = 3.7546e-03, PNorm = 149.7381, GNorm = 0.2298, lr_0 = 2.9405e-04
Loss = 3.3376e-03, PNorm = 149.7521, GNorm = 0.2139, lr_0 = 2.9385e-04
Loss = 4.2361e-03, PNorm = 149.7638, GNorm = 0.1298, lr_0 = 2.9365e-04
Loss = 5.0951e-03, PNorm = 149.7744, GNorm = 0.2457, lr_0 = 2.9345e-04
Loss = 3.6584e-03, PNorm = 149.7870, GNorm = 0.2174, lr_0 = 2.9325e-04
Loss = 6.3838e-03, PNorm = 149.7980, GNorm = 0.2795, lr_0 = 2.9305e-04
Loss = 6.1817e-03, PNorm = 149.8062, GNorm = 0.2166, lr_0 = 2.9284e-04
Loss = 3.8698e-03, PNorm = 149.8138, GNorm = 0.1072, lr_0 = 2.9264e-04
Loss = 6.6299e-03, PNorm = 149.8259, GNorm = 0.3758, lr_0 = 2.9244e-04
Loss = 5.6978e-03, PNorm = 149.8341, GNorm = 0.1990, lr_0 = 2.9224e-04
Loss = 5.4480e-03, PNorm = 149.8446, GNorm = 0.1656, lr_0 = 2.9204e-04
Loss = 5.4108e-03, PNorm = 149.8579, GNorm = 0.0758, lr_0 = 2.9184e-04
Loss = 3.8548e-03, PNorm = 149.8693, GNorm = 0.4182, lr_0 = 2.9164e-04
Loss = 6.4783e-03, PNorm = 149.8817, GNorm = 0.2944, lr_0 = 2.9144e-04
Loss = 3.7833e-03, PNorm = 149.8942, GNorm = 0.1750, lr_0 = 2.9124e-04
Validation mae = 0.477580
Epoch 17
Loss = 3.5938e-03, PNorm = 149.9021, GNorm = 0.1949, lr_0 = 2.9104e-04
Loss = 4.0399e-03, PNorm = 149.9079, GNorm = 0.0913, lr_0 = 2.9084e-04
Loss = 4.6250e-03, PNorm = 149.9125, GNorm = 0.4770, lr_0 = 2.9065e-04
Loss = 3.4793e-03, PNorm = 149.9199, GNorm = 0.4259, lr_0 = 2.9045e-04
Loss = 4.4405e-03, PNorm = 149.9253, GNorm = 0.0748, lr_0 = 2.9025e-04
Loss = 3.7836e-03, PNorm = 149.9290, GNorm = 0.1579, lr_0 = 2.9005e-04
Loss = 3.6679e-03, PNorm = 149.9370, GNorm = 0.0931, lr_0 = 2.8985e-04
Loss = 3.2456e-03, PNorm = 149.9470, GNorm = 0.2331, lr_0 = 2.8965e-04
Loss = 3.4365e-03, PNorm = 149.9527, GNorm = 0.2141, lr_0 = 2.8945e-04
Loss = 3.8005e-03, PNorm = 149.9595, GNorm = 0.0860, lr_0 = 2.8925e-04
Loss = 2.6586e-03, PNorm = 149.9677, GNorm = 0.1951, lr_0 = 2.8906e-04
Loss = 3.5155e-03, PNorm = 149.9733, GNorm = 0.1712, lr_0 = 2.8886e-04
Loss = 3.9617e-03, PNorm = 149.9785, GNorm = 0.1161, lr_0 = 2.8866e-04
Loss = 3.5733e-03, PNorm = 149.9848, GNorm = 0.2306, lr_0 = 2.8846e-04
Loss = 3.4890e-03, PNorm = 149.9909, GNorm = 0.0751, lr_0 = 2.8826e-04
Loss = 4.6840e-03, PNorm = 150.0006, GNorm = 0.2333, lr_0 = 2.8807e-04
Loss = 4.3173e-03, PNorm = 150.0101, GNorm = 0.1658, lr_0 = 2.8787e-04
Loss = 3.9655e-03, PNorm = 150.0157, GNorm = 0.1501, lr_0 = 2.8767e-04
Loss = 3.2065e-03, PNorm = 150.0220, GNorm = 0.1227, lr_0 = 2.8748e-04
Loss = 3.7304e-03, PNorm = 150.0322, GNorm = 0.0658, lr_0 = 2.8728e-04
Loss = 3.6987e-03, PNorm = 150.0390, GNorm = 0.0833, lr_0 = 2.8708e-04
Loss = 5.5173e-03, PNorm = 150.0455, GNorm = 0.2949, lr_0 = 2.8689e-04
Loss = 5.7693e-03, PNorm = 150.0533, GNorm = 0.1522, lr_0 = 2.8669e-04
Loss = 3.8320e-03, PNorm = 150.0617, GNorm = 0.1362, lr_0 = 2.8649e-04
Loss = 2.8720e-03, PNorm = 150.0714, GNorm = 0.1939, lr_0 = 2.8630e-04
Loss = 4.1654e-03, PNorm = 150.0781, GNorm = 0.1463, lr_0 = 2.8610e-04
Loss = 4.5221e-03, PNorm = 150.0826, GNorm = 0.0545, lr_0 = 2.8590e-04
Loss = 3.9478e-03, PNorm = 150.0885, GNorm = 0.2398, lr_0 = 2.8571e-04
Loss = 5.4483e-03, PNorm = 150.0942, GNorm = 0.2196, lr_0 = 2.8551e-04
Loss = 5.7651e-03, PNorm = 150.1011, GNorm = 0.0884, lr_0 = 2.8532e-04
Loss = 3.2162e-03, PNorm = 150.1110, GNorm = 0.2473, lr_0 = 2.8512e-04
Loss = 3.1963e-03, PNorm = 150.1203, GNorm = 0.1612, lr_0 = 2.8493e-04
Loss = 3.0304e-03, PNorm = 150.1314, GNorm = 0.1299, lr_0 = 2.8473e-04
Loss = 2.7940e-03, PNorm = 150.1410, GNorm = 0.1538, lr_0 = 2.8454e-04
Loss = 3.5644e-03, PNorm = 150.1486, GNorm = 0.1198, lr_0 = 2.8434e-04
Loss = 4.0600e-03, PNorm = 150.1574, GNorm = 0.2071, lr_0 = 2.8415e-04
Loss = 3.4349e-03, PNorm = 150.1640, GNorm = 0.0569, lr_0 = 2.8395e-04
Loss = 3.2613e-03, PNorm = 150.1709, GNorm = 0.1397, lr_0 = 2.8376e-04
Loss = 3.7246e-03, PNorm = 150.1776, GNorm = 0.2207, lr_0 = 2.8356e-04
Loss = 4.6950e-03, PNorm = 150.1843, GNorm = 0.4485, lr_0 = 2.8337e-04
Loss = 4.0177e-03, PNorm = 150.1948, GNorm = 0.2480, lr_0 = 2.8317e-04
Loss = 3.3946e-03, PNorm = 150.2023, GNorm = 0.2737, lr_0 = 2.8298e-04
Loss = 6.6350e-03, PNorm = 150.2103, GNorm = 0.1844, lr_0 = 2.8279e-04
Loss = 3.2041e-03, PNorm = 150.2177, GNorm = 0.1093, lr_0 = 2.8259e-04
Loss = 3.9514e-03, PNorm = 150.2230, GNorm = 0.2257, lr_0 = 2.8240e-04
Loss = 3.3166e-03, PNorm = 150.2307, GNorm = 0.1501, lr_0 = 2.8221e-04
Loss = 3.1145e-03, PNorm = 150.2374, GNorm = 0.2440, lr_0 = 2.8201e-04
Loss = 3.5501e-03, PNorm = 150.2456, GNorm = 0.0856, lr_0 = 2.8182e-04
Loss = 6.2671e-03, PNorm = 150.2560, GNorm = 0.2376, lr_0 = 2.8163e-04
Loss = 3.1443e-03, PNorm = 150.2654, GNorm = 0.2778, lr_0 = 2.8143e-04
Loss = 2.7255e-03, PNorm = 150.2716, GNorm = 0.2459, lr_0 = 2.8124e-04
Loss = 4.7605e-03, PNorm = 150.2779, GNorm = 0.1503, lr_0 = 2.8105e-04
Loss = 4.4546e-03, PNorm = 150.2898, GNorm = 0.1537, lr_0 = 2.8085e-04
Loss = 4.1822e-03, PNorm = 150.3001, GNorm = 0.1545, lr_0 = 2.8066e-04
Loss = 3.3556e-03, PNorm = 150.3071, GNorm = 0.0938, lr_0 = 2.8047e-04
Loss = 4.2870e-03, PNorm = 150.3133, GNorm = 0.2475, lr_0 = 2.8028e-04
Loss = 3.6487e-03, PNorm = 150.3220, GNorm = 0.0694, lr_0 = 2.8009e-04
Loss = 7.0327e-03, PNorm = 150.3290, GNorm = 0.2086, lr_0 = 2.7989e-04
Loss = 5.0506e-03, PNorm = 150.3363, GNorm = 0.1085, lr_0 = 2.7970e-04
Loss = 3.3517e-03, PNorm = 150.3452, GNorm = 0.4114, lr_0 = 2.7951e-04
Loss = 7.2950e-03, PNorm = 150.3530, GNorm = 0.1198, lr_0 = 2.7932e-04
Loss = 3.6847e-03, PNorm = 150.3631, GNorm = 0.2816, lr_0 = 2.7913e-04
Loss = 3.1798e-03, PNorm = 150.3689, GNorm = 0.0981, lr_0 = 2.7894e-04
Loss = 4.0150e-03, PNorm = 150.3771, GNorm = 0.2545, lr_0 = 2.7875e-04
Loss = 4.4950e-03, PNorm = 150.3844, GNorm = 0.1246, lr_0 = 2.7855e-04
Loss = 2.9602e-03, PNorm = 150.3913, GNorm = 0.1172, lr_0 = 2.7836e-04
Loss = 5.4316e-03, PNorm = 150.3984, GNorm = 0.2309, lr_0 = 2.7817e-04
Loss = 3.0382e-03, PNorm = 150.4079, GNorm = 0.2731, lr_0 = 2.7798e-04
Loss = 3.1145e-03, PNorm = 150.4167, GNorm = 0.1411, lr_0 = 2.7779e-04
Loss = 3.2048e-03, PNorm = 150.4287, GNorm = 0.0501, lr_0 = 2.7760e-04
Loss = 3.1938e-03, PNorm = 150.4371, GNorm = 0.2059, lr_0 = 2.7741e-04
Loss = 4.0833e-03, PNorm = 150.4435, GNorm = 0.1094, lr_0 = 2.7722e-04
Loss = 2.6992e-03, PNorm = 150.4513, GNorm = 0.1349, lr_0 = 2.7703e-04
Loss = 4.9750e-03, PNorm = 150.4599, GNorm = 0.2752, lr_0 = 2.7684e-04
Loss = 3.1214e-03, PNorm = 150.4682, GNorm = 0.0430, lr_0 = 2.7665e-04
Loss = 4.8109e-03, PNorm = 150.4756, GNorm = 0.2964, lr_0 = 2.7646e-04
Loss = 3.5210e-03, PNorm = 150.4863, GNorm = 0.1678, lr_0 = 2.7627e-04
Loss = 6.6169e-03, PNorm = 150.4925, GNorm = 0.1611, lr_0 = 2.7608e-04
Loss = 4.4114e-03, PNorm = 150.4994, GNorm = 0.2022, lr_0 = 2.7590e-04
Loss = 4.6137e-03, PNorm = 150.5054, GNorm = 0.2307, lr_0 = 2.7571e-04
Loss = 3.3372e-03, PNorm = 150.5153, GNorm = 0.2223, lr_0 = 2.7552e-04
Loss = 3.1647e-03, PNorm = 150.5227, GNorm = 0.0609, lr_0 = 2.7533e-04
Loss = 3.6727e-03, PNorm = 150.5311, GNorm = 0.0910, lr_0 = 2.7514e-04
Loss = 3.6867e-03, PNorm = 150.5424, GNorm = 0.1090, lr_0 = 2.7495e-04
Loss = 4.5405e-03, PNorm = 150.5541, GNorm = 0.1007, lr_0 = 2.7476e-04
Loss = 3.4873e-03, PNorm = 150.5639, GNorm = 0.1788, lr_0 = 2.7457e-04
Loss = 3.8466e-03, PNorm = 150.5719, GNorm = 0.3342, lr_0 = 2.7439e-04
Loss = 3.5922e-03, PNorm = 150.5815, GNorm = 0.2030, lr_0 = 2.7420e-04
Loss = 3.1794e-03, PNorm = 150.5862, GNorm = 0.1970, lr_0 = 2.7401e-04
Loss = 3.5094e-03, PNorm = 150.5947, GNorm = 0.1374, lr_0 = 2.7382e-04
Loss = 4.4971e-03, PNorm = 150.6056, GNorm = 0.1463, lr_0 = 2.7364e-04
Loss = 3.2300e-03, PNorm = 150.6139, GNorm = 0.1005, lr_0 = 2.7345e-04
Loss = 4.5705e-03, PNorm = 150.6226, GNorm = 0.2532, lr_0 = 2.7326e-04
Loss = 2.9790e-03, PNorm = 150.6288, GNorm = 0.1038, lr_0 = 2.7307e-04
Loss = 4.7914e-03, PNorm = 150.6320, GNorm = 0.4155, lr_0 = 2.7289e-04
Loss = 3.6462e-03, PNorm = 150.6406, GNorm = 0.1755, lr_0 = 2.7270e-04
Loss = 3.2459e-03, PNorm = 150.6506, GNorm = 0.1229, lr_0 = 2.7251e-04
Loss = 3.3659e-03, PNorm = 150.6593, GNorm = 0.1161, lr_0 = 2.7233e-04
Loss = 3.2238e-03, PNorm = 150.6669, GNorm = 0.0952, lr_0 = 2.7214e-04
Loss = 4.0953e-03, PNorm = 150.6774, GNorm = 0.1103, lr_0 = 2.7195e-04
Loss = 3.2703e-03, PNorm = 150.6852, GNorm = 0.3940, lr_0 = 2.7177e-04
Loss = 6.8715e-03, PNorm = 150.6942, GNorm = 0.1013, lr_0 = 2.7158e-04
Loss = 4.1832e-03, PNorm = 150.7012, GNorm = 0.3031, lr_0 = 2.7139e-04
Loss = 3.7780e-03, PNorm = 150.7132, GNorm = 0.3978, lr_0 = 2.7121e-04
Loss = 4.0991e-03, PNorm = 150.7229, GNorm = 0.1596, lr_0 = 2.7102e-04
Loss = 4.5249e-03, PNorm = 150.7297, GNorm = 0.1871, lr_0 = 2.7084e-04
Loss = 5.3229e-03, PNorm = 150.7366, GNorm = 0.2474, lr_0 = 2.7065e-04
Loss = 3.1551e-03, PNorm = 150.7455, GNorm = 0.1593, lr_0 = 2.7047e-04
Loss = 3.4414e-03, PNorm = 150.7541, GNorm = 0.1769, lr_0 = 2.7028e-04
Loss = 4.6426e-03, PNorm = 150.7621, GNorm = 0.2048, lr_0 = 2.7010e-04
Loss = 3.5992e-03, PNorm = 150.7715, GNorm = 0.1861, lr_0 = 2.6991e-04
Loss = 5.2478e-03, PNorm = 150.7784, GNorm = 0.1393, lr_0 = 2.6973e-04
Loss = 3.3174e-03, PNorm = 150.7856, GNorm = 0.1642, lr_0 = 2.6954e-04
Loss = 3.5297e-03, PNorm = 150.7949, GNorm = 0.1506, lr_0 = 2.6936e-04
Loss = 4.1259e-03, PNorm = 150.8059, GNorm = 0.1167, lr_0 = 2.6917e-04
Loss = 3.2549e-03, PNorm = 150.8151, GNorm = 0.2429, lr_0 = 2.6899e-04
Loss = 3.8107e-03, PNorm = 150.8243, GNorm = 0.3202, lr_0 = 2.6880e-04
Loss = 2.8834e-03, PNorm = 150.8306, GNorm = 0.1121, lr_0 = 2.6862e-04
Loss = 3.3009e-03, PNorm = 150.8366, GNorm = 0.1305, lr_0 = 2.6844e-04
Loss = 4.1869e-03, PNorm = 150.8453, GNorm = 0.2212, lr_0 = 2.6825e-04
Validation mae = 0.477324
Epoch 18
Loss = 3.4312e-03, PNorm = 150.8511, GNorm = 0.0751, lr_0 = 2.6807e-04
Loss = 3.5435e-03, PNorm = 150.8562, GNorm = 0.2475, lr_0 = 2.6788e-04
Loss = 4.5080e-03, PNorm = 150.8599, GNorm = 0.2685, lr_0 = 2.6770e-04
Loss = 3.4882e-03, PNorm = 150.8645, GNorm = 0.1668, lr_0 = 2.6752e-04
Loss = 3.0592e-03, PNorm = 150.8698, GNorm = 0.0591, lr_0 = 2.6733e-04
Loss = 2.8327e-03, PNorm = 150.8755, GNorm = 0.0714, lr_0 = 2.6715e-04
Loss = 3.8602e-03, PNorm = 150.8808, GNorm = 0.1772, lr_0 = 2.6697e-04
Loss = 2.4659e-03, PNorm = 150.8873, GNorm = 0.0865, lr_0 = 2.6678e-04
Loss = 3.7974e-03, PNorm = 150.8944, GNorm = 0.1297, lr_0 = 2.6660e-04
Loss = 3.5215e-03, PNorm = 150.9012, GNorm = 0.1693, lr_0 = 2.6642e-04
Loss = 4.2971e-03, PNorm = 150.9072, GNorm = 0.4815, lr_0 = 2.6624e-04
Loss = 4.1908e-03, PNorm = 150.9137, GNorm = 0.1429, lr_0 = 2.6605e-04
Loss = 2.9645e-03, PNorm = 150.9202, GNorm = 0.2761, lr_0 = 2.6587e-04
Loss = 2.7992e-03, PNorm = 150.9254, GNorm = 0.2090, lr_0 = 2.6569e-04
Loss = 2.9443e-03, PNorm = 150.9304, GNorm = 0.2052, lr_0 = 2.6551e-04
Loss = 2.6604e-03, PNorm = 150.9398, GNorm = 0.2522, lr_0 = 2.6533e-04
Loss = 3.4047e-03, PNorm = 150.9489, GNorm = 0.1431, lr_0 = 2.6514e-04
Loss = 6.1376e-03, PNorm = 150.9540, GNorm = 0.1572, lr_0 = 2.6496e-04
Loss = 2.2968e-03, PNorm = 150.9578, GNorm = 0.2386, lr_0 = 2.6478e-04
Loss = 3.5395e-03, PNorm = 150.9641, GNorm = 0.0671, lr_0 = 2.6460e-04
Loss = 2.9900e-03, PNorm = 150.9698, GNorm = 0.1764, lr_0 = 2.6442e-04
Loss = 3.0793e-03, PNorm = 150.9738, GNorm = 0.2025, lr_0 = 2.6424e-04
Loss = 3.4974e-03, PNorm = 150.9790, GNorm = 0.1822, lr_0 = 2.6406e-04
Loss = 3.0419e-03, PNorm = 150.9865, GNorm = 0.3203, lr_0 = 2.6388e-04
Loss = 5.2027e-03, PNorm = 150.9934, GNorm = 0.2388, lr_0 = 2.6369e-04
Loss = 2.6841e-03, PNorm = 151.0014, GNorm = 0.0627, lr_0 = 2.6351e-04
Loss = 3.4816e-03, PNorm = 151.0108, GNorm = 0.1595, lr_0 = 2.6333e-04
Loss = 2.6821e-03, PNorm = 151.0165, GNorm = 0.1927, lr_0 = 2.6315e-04
Loss = 4.0848e-03, PNorm = 151.0208, GNorm = 0.2761, lr_0 = 2.6297e-04
Loss = 2.9357e-03, PNorm = 151.0276, GNorm = 0.1086, lr_0 = 2.6279e-04
Loss = 3.0992e-03, PNorm = 151.0356, GNorm = 0.1639, lr_0 = 2.6261e-04
Loss = 2.7119e-03, PNorm = 151.0427, GNorm = 0.1486, lr_0 = 2.6243e-04
Loss = 3.8179e-03, PNorm = 151.0486, GNorm = 0.2189, lr_0 = 2.6225e-04
Loss = 4.0206e-03, PNorm = 151.0564, GNorm = 0.2611, lr_0 = 2.6207e-04
Loss = 2.4474e-03, PNorm = 151.0649, GNorm = 0.2788, lr_0 = 2.6189e-04
Loss = 2.3608e-03, PNorm = 151.0733, GNorm = 0.1761, lr_0 = 2.6171e-04
Loss = 2.3373e-03, PNorm = 151.0811, GNorm = 0.1833, lr_0 = 2.6153e-04
Loss = 2.8742e-03, PNorm = 151.0857, GNorm = 0.0934, lr_0 = 2.6136e-04
Loss = 3.0808e-03, PNorm = 151.0906, GNorm = 0.3194, lr_0 = 2.6118e-04
Loss = 3.8193e-03, PNorm = 151.0976, GNorm = 0.1325, lr_0 = 2.6100e-04
Loss = 3.6511e-03, PNorm = 151.1026, GNorm = 0.1718, lr_0 = 2.6082e-04
Loss = 3.1834e-03, PNorm = 151.1102, GNorm = 0.0932, lr_0 = 2.6064e-04
Loss = 3.2066e-03, PNorm = 151.1177, GNorm = 0.5480, lr_0 = 2.6046e-04
Loss = 4.4050e-03, PNorm = 151.1260, GNorm = 0.1795, lr_0 = 2.6028e-04
Loss = 4.0231e-03, PNorm = 151.1341, GNorm = 0.1794, lr_0 = 2.6011e-04
Loss = 2.7189e-03, PNorm = 151.1407, GNorm = 0.0586, lr_0 = 2.5993e-04
Loss = 4.9619e-03, PNorm = 151.1477, GNorm = 0.2577, lr_0 = 2.5975e-04
Loss = 3.4947e-03, PNorm = 151.1566, GNorm = 0.1129, lr_0 = 2.5957e-04
Loss = 3.2218e-03, PNorm = 151.1625, GNorm = 0.1598, lr_0 = 2.5939e-04
Loss = 2.7196e-03, PNorm = 151.1692, GNorm = 0.1787, lr_0 = 2.5922e-04
Loss = 2.7186e-03, PNorm = 151.1770, GNorm = 0.1711, lr_0 = 2.5904e-04
Loss = 4.6931e-03, PNorm = 151.1835, GNorm = 0.0961, lr_0 = 2.5886e-04
Loss = 3.1590e-03, PNorm = 151.1913, GNorm = 0.1228, lr_0 = 2.5868e-04
Loss = 3.6483e-03, PNorm = 151.1965, GNorm = 0.3979, lr_0 = 2.5851e-04
Loss = 5.9197e-03, PNorm = 151.1990, GNorm = 0.2623, lr_0 = 2.5833e-04
Loss = 2.7662e-03, PNorm = 151.2064, GNorm = 0.1082, lr_0 = 2.5815e-04
Loss = 2.6465e-03, PNorm = 151.2134, GNorm = 0.1957, lr_0 = 2.5797e-04
Loss = 2.8933e-03, PNorm = 151.2212, GNorm = 0.1686, lr_0 = 2.5780e-04
Loss = 3.6309e-03, PNorm = 151.2296, GNorm = 0.1691, lr_0 = 2.5762e-04
Loss = 2.6106e-03, PNorm = 151.2368, GNorm = 0.1632, lr_0 = 2.5745e-04
Loss = 3.1983e-03, PNorm = 151.2428, GNorm = 0.2584, lr_0 = 2.5727e-04
Loss = 2.7891e-03, PNorm = 151.2476, GNorm = 0.1155, lr_0 = 2.5709e-04
Loss = 3.5455e-03, PNorm = 151.2591, GNorm = 0.0586, lr_0 = 2.5692e-04
Loss = 3.3826e-03, PNorm = 151.2688, GNorm = 0.3160, lr_0 = 2.5674e-04
Loss = 3.6972e-03, PNorm = 151.2792, GNorm = 0.2493, lr_0 = 2.5656e-04
Loss = 4.0531e-03, PNorm = 151.2856, GNorm = 0.0613, lr_0 = 2.5639e-04
Loss = 3.2369e-03, PNorm = 151.2916, GNorm = 0.1103, lr_0 = 2.5621e-04
Loss = 2.5840e-03, PNorm = 151.3010, GNorm = 0.0736, lr_0 = 2.5604e-04
Loss = 5.7747e-03, PNorm = 151.3056, GNorm = 0.1687, lr_0 = 2.5586e-04
Loss = 2.7965e-03, PNorm = 151.3157, GNorm = 0.0659, lr_0 = 2.5569e-04
Loss = 2.2876e-03, PNorm = 151.3211, GNorm = 0.2502, lr_0 = 2.5551e-04
Loss = 3.6420e-03, PNorm = 151.3257, GNorm = 0.2291, lr_0 = 2.5534e-04
Loss = 3.1596e-03, PNorm = 151.3314, GNorm = 0.2541, lr_0 = 2.5516e-04
Loss = 2.8727e-03, PNorm = 151.3390, GNorm = 0.1421, lr_0 = 2.5499e-04
Loss = 3.3893e-03, PNorm = 151.3457, GNorm = 0.0700, lr_0 = 2.5481e-04
Loss = 3.1141e-03, PNorm = 151.3517, GNorm = 0.0884, lr_0 = 2.5464e-04
Loss = 3.4103e-03, PNorm = 151.3590, GNorm = 0.1253, lr_0 = 2.5446e-04
Loss = 2.7622e-03, PNorm = 151.3644, GNorm = 0.0496, lr_0 = 2.5429e-04
Loss = 4.0539e-03, PNorm = 151.3685, GNorm = 0.1293, lr_0 = 2.5411e-04
Loss = 5.3306e-03, PNorm = 151.3731, GNorm = 0.2057, lr_0 = 2.5394e-04
Loss = 2.2501e-03, PNorm = 151.3783, GNorm = 0.1852, lr_0 = 2.5377e-04
Loss = 6.7389e-03, PNorm = 151.3848, GNorm = 0.1132, lr_0 = 2.5359e-04
Loss = 4.0184e-03, PNorm = 151.3920, GNorm = 0.2269, lr_0 = 2.5342e-04
Loss = 4.5885e-03, PNorm = 151.3988, GNorm = 0.1236, lr_0 = 2.5325e-04
Loss = 2.8035e-03, PNorm = 151.4051, GNorm = 0.1490, lr_0 = 2.5307e-04
Loss = 3.1776e-03, PNorm = 151.4103, GNorm = 0.3633, lr_0 = 2.5290e-04
Loss = 3.0258e-03, PNorm = 151.4157, GNorm = 0.1665, lr_0 = 2.5273e-04
Loss = 4.0730e-03, PNorm = 151.4235, GNorm = 0.2614, lr_0 = 2.5255e-04
Loss = 2.4081e-03, PNorm = 151.4308, GNorm = 0.1056, lr_0 = 2.5238e-04
Loss = 3.0993e-03, PNorm = 151.4389, GNorm = 0.0999, lr_0 = 2.5221e-04
Loss = 3.4013e-03, PNorm = 151.4448, GNorm = 0.0619, lr_0 = 2.5203e-04
Loss = 3.7916e-03, PNorm = 151.4516, GNorm = 0.1404, lr_0 = 2.5186e-04
Loss = 3.6700e-03, PNorm = 151.4643, GNorm = 0.0721, lr_0 = 2.5169e-04
Loss = 4.5785e-03, PNorm = 151.4754, GNorm = 0.3007, lr_0 = 2.5152e-04
Loss = 2.7491e-03, PNorm = 151.4821, GNorm = 0.2711, lr_0 = 2.5134e-04
Loss = 3.8395e-03, PNorm = 151.4885, GNorm = 0.0894, lr_0 = 2.5117e-04
Loss = 2.6038e-03, PNorm = 151.4958, GNorm = 0.2621, lr_0 = 2.5100e-04
Loss = 3.0541e-03, PNorm = 151.4996, GNorm = 0.1926, lr_0 = 2.5083e-04
Loss = 3.5130e-03, PNorm = 151.5035, GNorm = 0.1244, lr_0 = 2.5066e-04
Loss = 2.6716e-03, PNorm = 151.5079, GNorm = 0.0758, lr_0 = 2.5048e-04
Loss = 3.3427e-03, PNorm = 151.5127, GNorm = 0.1706, lr_0 = 2.5031e-04
Loss = 2.7518e-03, PNorm = 151.5182, GNorm = 0.1190, lr_0 = 2.5014e-04
Loss = 3.0409e-03, PNorm = 151.5237, GNorm = 0.0538, lr_0 = 2.4997e-04
Loss = 3.1864e-03, PNorm = 151.5316, GNorm = 0.1303, lr_0 = 2.4980e-04
Loss = 6.1187e-03, PNorm = 151.5414, GNorm = 0.2721, lr_0 = 2.4963e-04
Loss = 5.0331e-03, PNorm = 151.5494, GNorm = 0.2537, lr_0 = 2.4946e-04
Loss = 2.8424e-03, PNorm = 151.5563, GNorm = 0.0886, lr_0 = 2.4929e-04
Loss = 5.0967e-03, PNorm = 151.5639, GNorm = 0.6607, lr_0 = 2.4911e-04
Loss = 3.3268e-03, PNorm = 151.5727, GNorm = 0.0693, lr_0 = 2.4894e-04
Loss = 3.7388e-03, PNorm = 151.5807, GNorm = 0.1212, lr_0 = 2.4877e-04
Loss = 2.6050e-03, PNorm = 151.5889, GNorm = 0.0885, lr_0 = 2.4860e-04
Loss = 2.6082e-03, PNorm = 151.5970, GNorm = 0.1596, lr_0 = 2.4843e-04
Loss = 2.4484e-03, PNorm = 151.6044, GNorm = 0.0974, lr_0 = 2.4826e-04
Loss = 3.5749e-03, PNorm = 151.6107, GNorm = 0.0764, lr_0 = 2.4809e-04
Loss = 2.5262e-03, PNorm = 151.6189, GNorm = 0.3596, lr_0 = 2.4792e-04
Loss = 6.0950e-03, PNorm = 151.6231, GNorm = 0.1481, lr_0 = 2.4775e-04
Loss = 3.5739e-03, PNorm = 151.6260, GNorm = 0.3455, lr_0 = 2.4758e-04
Loss = 3.1077e-03, PNorm = 151.6289, GNorm = 0.0679, lr_0 = 2.4741e-04
Loss = 2.6326e-03, PNorm = 151.6340, GNorm = 0.1408, lr_0 = 2.4724e-04
Loss = 2.7094e-03, PNorm = 151.6421, GNorm = 0.1725, lr_0 = 2.4707e-04
Validation mae = 0.477127
Epoch 19
Loss = 3.5329e-03, PNorm = 151.6512, GNorm = 0.1676, lr_0 = 2.4690e-04
Loss = 3.0358e-03, PNorm = 151.6562, GNorm = 0.2704, lr_0 = 2.4674e-04
Loss = 2.3011e-03, PNorm = 151.6589, GNorm = 0.0814, lr_0 = 2.4657e-04
Loss = 2.6148e-03, PNorm = 151.6621, GNorm = 0.0504, lr_0 = 2.4640e-04
Loss = 3.3323e-03, PNorm = 151.6665, GNorm = 0.3984, lr_0 = 2.4623e-04
Loss = 2.4729e-03, PNorm = 151.6696, GNorm = 0.1735, lr_0 = 2.4606e-04
Loss = 2.6546e-03, PNorm = 151.6791, GNorm = 0.1458, lr_0 = 2.4589e-04
Loss = 2.3107e-03, PNorm = 151.6881, GNorm = 0.0894, lr_0 = 2.4572e-04
Loss = 2.6880e-03, PNorm = 151.6934, GNorm = 0.2594, lr_0 = 2.4556e-04
Loss = 2.2474e-03, PNorm = 151.6966, GNorm = 0.1555, lr_0 = 2.4539e-04
Loss = 2.5405e-03, PNorm = 151.7028, GNorm = 0.3340, lr_0 = 2.4522e-04
Loss = 2.7838e-03, PNorm = 151.7108, GNorm = 0.2384, lr_0 = 2.4505e-04
Loss = 4.0281e-03, PNorm = 151.7171, GNorm = 0.1738, lr_0 = 2.4488e-04
Loss = 2.8050e-03, PNorm = 151.7240, GNorm = 0.1230, lr_0 = 2.4472e-04
Loss = 2.3340e-03, PNorm = 151.7296, GNorm = 0.2673, lr_0 = 2.4455e-04
Loss = 2.4228e-03, PNorm = 151.7382, GNorm = 0.0557, lr_0 = 2.4438e-04
Loss = 3.9464e-03, PNorm = 151.7456, GNorm = 0.1561, lr_0 = 2.4421e-04
Loss = 2.1000e-03, PNorm = 151.7506, GNorm = 0.0478, lr_0 = 2.4405e-04
Loss = 2.7423e-03, PNorm = 151.7531, GNorm = 0.0645, lr_0 = 2.4388e-04
Loss = 4.0683e-03, PNorm = 151.7566, GNorm = 0.1708, lr_0 = 2.4371e-04
Loss = 3.0146e-03, PNorm = 151.7628, GNorm = 0.0510, lr_0 = 2.4354e-04
Loss = 2.2697e-03, PNorm = 151.7664, GNorm = 0.1166, lr_0 = 2.4338e-04
Loss = 3.6597e-03, PNorm = 151.7658, GNorm = 0.3011, lr_0 = 2.4321e-04
Loss = 3.0975e-03, PNorm = 151.7702, GNorm = 0.3179, lr_0 = 2.4304e-04
Loss = 6.5124e-03, PNorm = 151.7759, GNorm = 0.1490, lr_0 = 2.4288e-04
Loss = 2.7699e-03, PNorm = 151.7828, GNorm = 0.1712, lr_0 = 2.4271e-04
Loss = 2.1984e-03, PNorm = 151.7856, GNorm = 0.1540, lr_0 = 2.4254e-04
Loss = 3.0325e-03, PNorm = 151.7894, GNorm = 0.0769, lr_0 = 2.4238e-04
Loss = 4.3498e-03, PNorm = 151.7958, GNorm = 0.0964, lr_0 = 2.4221e-04
Loss = 4.3280e-03, PNorm = 151.7989, GNorm = 0.2668, lr_0 = 2.4205e-04
Loss = 2.7251e-03, PNorm = 151.8045, GNorm = 0.2486, lr_0 = 2.4188e-04
Loss = 4.2592e-03, PNorm = 151.8097, GNorm = 0.3502, lr_0 = 2.4171e-04
Loss = 2.3394e-03, PNorm = 151.8159, GNorm = 0.2285, lr_0 = 2.4155e-04
Loss = 2.3902e-03, PNorm = 151.8221, GNorm = 0.1925, lr_0 = 2.4138e-04
Loss = 2.9857e-03, PNorm = 151.8297, GNorm = 0.0785, lr_0 = 2.4122e-04
Loss = 2.2708e-03, PNorm = 151.8358, GNorm = 0.1910, lr_0 = 2.4105e-04
Loss = 3.6548e-03, PNorm = 151.8427, GNorm = 0.1349, lr_0 = 2.4089e-04
Loss = 2.1906e-03, PNorm = 151.8479, GNorm = 0.0611, lr_0 = 2.4072e-04
Loss = 2.6256e-03, PNorm = 151.8545, GNorm = 0.3434, lr_0 = 2.4056e-04
Loss = 2.3836e-03, PNorm = 151.8575, GNorm = 0.2293, lr_0 = 2.4039e-04
Loss = 2.9075e-03, PNorm = 151.8642, GNorm = 0.3706, lr_0 = 2.4023e-04
Loss = 2.4486e-03, PNorm = 151.8690, GNorm = 0.1193, lr_0 = 2.4006e-04
Loss = 2.3257e-03, PNorm = 151.8729, GNorm = 0.0680, lr_0 = 2.3990e-04
Loss = 3.5247e-03, PNorm = 151.8785, GNorm = 0.1512, lr_0 = 2.3974e-04
Loss = 2.5051e-03, PNorm = 151.8841, GNorm = 0.1365, lr_0 = 2.3957e-04
Loss = 2.6436e-03, PNorm = 151.8932, GNorm = 0.2612, lr_0 = 2.3941e-04
Loss = 2.7818e-03, PNorm = 151.9027, GNorm = 0.1178, lr_0 = 2.3924e-04
Loss = 2.2671e-03, PNorm = 151.9071, GNorm = 0.1119, lr_0 = 2.3908e-04
Loss = 2.7962e-03, PNorm = 151.9125, GNorm = 0.1027, lr_0 = 2.3892e-04
Loss = 2.2395e-03, PNorm = 151.9137, GNorm = 0.1003, lr_0 = 2.3875e-04
Loss = 2.5879e-03, PNorm = 151.9189, GNorm = 0.1232, lr_0 = 2.3859e-04
Loss = 2.6285e-03, PNorm = 151.9230, GNorm = 0.1284, lr_0 = 2.3842e-04
Loss = 2.4447e-03, PNorm = 151.9291, GNorm = 0.2375, lr_0 = 2.3826e-04
Loss = 2.7478e-03, PNorm = 151.9345, GNorm = 0.1420, lr_0 = 2.3810e-04
Loss = 3.8525e-03, PNorm = 151.9367, GNorm = 0.4280, lr_0 = 2.3794e-04
Loss = 4.1151e-03, PNorm = 151.9396, GNorm = 0.2093, lr_0 = 2.3777e-04
Loss = 2.9007e-03, PNorm = 151.9458, GNorm = 0.2265, lr_0 = 2.3761e-04
Loss = 2.8673e-03, PNorm = 151.9557, GNorm = 0.0490, lr_0 = 2.3745e-04
Loss = 2.6474e-03, PNorm = 151.9632, GNorm = 0.1089, lr_0 = 2.3728e-04
Loss = 4.0529e-03, PNorm = 151.9679, GNorm = 0.0984, lr_0 = 2.3712e-04
Loss = 2.5596e-03, PNorm = 151.9744, GNorm = 0.0933, lr_0 = 2.3696e-04
Loss = 2.5025e-03, PNorm = 151.9805, GNorm = 0.0806, lr_0 = 2.3680e-04
Loss = 2.1930e-03, PNorm = 151.9868, GNorm = 0.1186, lr_0 = 2.3663e-04
Loss = 3.0664e-03, PNorm = 151.9961, GNorm = 0.2491, lr_0 = 2.3647e-04
Loss = 3.5964e-03, PNorm = 152.0034, GNorm = 0.2218, lr_0 = 2.3631e-04
Loss = 2.3523e-03, PNorm = 152.0070, GNorm = 0.1741, lr_0 = 2.3615e-04
Loss = 4.1741e-03, PNorm = 152.0105, GNorm = 0.1066, lr_0 = 2.3599e-04
Loss = 3.2884e-03, PNorm = 152.0154, GNorm = 0.1796, lr_0 = 2.3582e-04
Loss = 2.7172e-03, PNorm = 152.0204, GNorm = 0.0787, lr_0 = 2.3566e-04
Loss = 3.3936e-03, PNorm = 152.0271, GNorm = 0.1436, lr_0 = 2.3550e-04
Loss = 2.9632e-03, PNorm = 152.0337, GNorm = 0.2377, lr_0 = 2.3534e-04
Loss = 2.5405e-03, PNorm = 152.0400, GNorm = 0.0793, lr_0 = 2.3518e-04
Loss = 3.0227e-03, PNorm = 152.0452, GNorm = 0.0672, lr_0 = 2.3502e-04
Loss = 2.8536e-03, PNorm = 152.0505, GNorm = 0.2050, lr_0 = 2.3486e-04
Loss = 5.2568e-03, PNorm = 152.0562, GNorm = 0.0639, lr_0 = 2.3470e-04
Loss = 3.4384e-03, PNorm = 152.0635, GNorm = 0.1487, lr_0 = 2.3454e-04
Loss = 2.4472e-03, PNorm = 152.0725, GNorm = 0.1183, lr_0 = 2.3437e-04
Loss = 2.2506e-03, PNorm = 152.0811, GNorm = 0.1612, lr_0 = 2.3421e-04
Loss = 2.7160e-03, PNorm = 152.0871, GNorm = 0.1232, lr_0 = 2.3405e-04
Loss = 4.0165e-03, PNorm = 152.0919, GNorm = 0.1116, lr_0 = 2.3389e-04
Loss = 2.2726e-03, PNorm = 152.0957, GNorm = 0.1269, lr_0 = 2.3373e-04
Loss = 5.3086e-03, PNorm = 152.1007, GNorm = 0.5040, lr_0 = 2.3357e-04
Loss = 2.8883e-03, PNorm = 152.1086, GNorm = 0.2170, lr_0 = 2.3341e-04
Loss = 4.5562e-03, PNorm = 152.1164, GNorm = 0.2363, lr_0 = 2.3325e-04
Loss = 2.1793e-03, PNorm = 152.1214, GNorm = 0.3098, lr_0 = 2.3309e-04
Loss = 4.3059e-03, PNorm = 152.1247, GNorm = 0.1317, lr_0 = 2.3293e-04
Loss = 2.7079e-03, PNorm = 152.1300, GNorm = 0.0689, lr_0 = 2.3277e-04
Loss = 2.5752e-03, PNorm = 152.1359, GNorm = 0.0715, lr_0 = 2.3261e-04
Loss = 4.3200e-03, PNorm = 152.1406, GNorm = 0.1334, lr_0 = 2.3246e-04
Loss = 3.3794e-03, PNorm = 152.1469, GNorm = 0.1841, lr_0 = 2.3230e-04
Loss = 2.7477e-03, PNorm = 152.1531, GNorm = 0.0854, lr_0 = 2.3214e-04
Loss = 2.8278e-03, PNorm = 152.1605, GNorm = 0.1644, lr_0 = 2.3198e-04
Loss = 2.2296e-03, PNorm = 152.1665, GNorm = 0.1650, lr_0 = 2.3182e-04
Loss = 2.1547e-03, PNorm = 152.1705, GNorm = 0.2182, lr_0 = 2.3166e-04
Loss = 2.9559e-03, PNorm = 152.1764, GNorm = 0.1155, lr_0 = 2.3150e-04
Loss = 2.2376e-03, PNorm = 152.1819, GNorm = 0.3068, lr_0 = 2.3134e-04
Loss = 2.1894e-03, PNorm = 152.1853, GNorm = 0.1001, lr_0 = 2.3118e-04
Loss = 2.1754e-03, PNorm = 152.1885, GNorm = 0.1772, lr_0 = 2.3103e-04
Loss = 2.4170e-03, PNorm = 152.1956, GNorm = 0.1253, lr_0 = 2.3087e-04
Loss = 2.2815e-03, PNorm = 152.2042, GNorm = 0.0910, lr_0 = 2.3071e-04
Loss = 2.6572e-03, PNorm = 152.2123, GNorm = 0.0679, lr_0 = 2.3055e-04
Loss = 2.2401e-03, PNorm = 152.2155, GNorm = 0.1167, lr_0 = 2.3039e-04
Loss = 2.2901e-03, PNorm = 152.2183, GNorm = 0.1764, lr_0 = 2.3024e-04
Loss = 2.5309e-03, PNorm = 152.2256, GNorm = 0.0788, lr_0 = 2.3008e-04
Loss = 3.1183e-03, PNorm = 152.2308, GNorm = 0.1044, lr_0 = 2.2992e-04
Loss = 6.8247e-03, PNorm = 152.2377, GNorm = 0.0911, lr_0 = 2.2976e-04
Loss = 3.2654e-03, PNorm = 152.2428, GNorm = 0.2360, lr_0 = 2.2961e-04
Loss = 2.6207e-03, PNorm = 152.2505, GNorm = 0.2941, lr_0 = 2.2945e-04
Loss = 3.1606e-03, PNorm = 152.2583, GNorm = 0.1370, lr_0 = 2.2929e-04
Loss = 4.0851e-03, PNorm = 152.2620, GNorm = 0.2155, lr_0 = 2.2913e-04
Loss = 2.1760e-03, PNorm = 152.2686, GNorm = 0.0634, lr_0 = 2.2898e-04
Loss = 3.8940e-03, PNorm = 152.2743, GNorm = 0.1139, lr_0 = 2.2882e-04
Loss = 4.4591e-03, PNorm = 152.2831, GNorm = 0.2070, lr_0 = 2.2866e-04
Loss = 3.9099e-03, PNorm = 152.2892, GNorm = 0.3428, lr_0 = 2.2851e-04
Loss = 3.3458e-03, PNorm = 152.2932, GNorm = 0.2855, lr_0 = 2.2835e-04
Loss = 3.3093e-03, PNorm = 152.2956, GNorm = 0.1476, lr_0 = 2.2819e-04
Loss = 2.7405e-03, PNorm = 152.3055, GNorm = 0.1289, lr_0 = 2.2804e-04
Loss = 3.4588e-03, PNorm = 152.3125, GNorm = 0.1257, lr_0 = 2.2788e-04
Loss = 3.0403e-03, PNorm = 152.3183, GNorm = 0.1702, lr_0 = 2.2773e-04
Loss = 4.0807e-03, PNorm = 152.3250, GNorm = 0.1776, lr_0 = 2.2757e-04
Validation mae = 0.476339
Epoch 20
Loss = 2.0477e-03, PNorm = 152.3300, GNorm = 0.0683, lr_0 = 2.2741e-04
Loss = 2.2728e-03, PNorm = 152.3347, GNorm = 0.1763, lr_0 = 2.2726e-04
Loss = 2.3598e-03, PNorm = 152.3413, GNorm = 0.1217, lr_0 = 2.2710e-04
Loss = 2.6301e-03, PNorm = 152.3459, GNorm = 0.1430, lr_0 = 2.2695e-04
Loss = 2.3492e-03, PNorm = 152.3483, GNorm = 0.1395, lr_0 = 2.2679e-04
Loss = 2.6382e-03, PNorm = 152.3522, GNorm = 0.3641, lr_0 = 2.2664e-04
Loss = 2.9306e-03, PNorm = 152.3547, GNorm = 0.1206, lr_0 = 2.2648e-04
Loss = 1.9619e-03, PNorm = 152.3589, GNorm = 0.1333, lr_0 = 2.2632e-04
Loss = 2.9335e-03, PNorm = 152.3665, GNorm = 0.0797, lr_0 = 2.2617e-04
Loss = 2.2957e-03, PNorm = 152.3721, GNorm = 0.2428, lr_0 = 2.2601e-04
Loss = 2.1013e-03, PNorm = 152.3768, GNorm = 0.1245, lr_0 = 2.2586e-04
Loss = 3.0373e-03, PNorm = 152.3830, GNorm = 0.1134, lr_0 = 2.2571e-04
Loss = 2.7486e-03, PNorm = 152.3865, GNorm = 0.2196, lr_0 = 2.2555e-04
Loss = 1.8771e-03, PNorm = 152.3907, GNorm = 0.1346, lr_0 = 2.2540e-04
Loss = 2.0831e-03, PNorm = 152.3940, GNorm = 0.0882, lr_0 = 2.2524e-04
Loss = 2.6717e-03, PNorm = 152.3970, GNorm = 0.1665, lr_0 = 2.2509e-04
Loss = 3.7954e-03, PNorm = 152.4010, GNorm = 0.0995, lr_0 = 2.2493e-04
Loss = 2.5413e-03, PNorm = 152.4065, GNorm = 0.0998, lr_0 = 2.2478e-04
Loss = 2.9938e-03, PNorm = 152.4093, GNorm = 0.2071, lr_0 = 2.2463e-04
Loss = 2.0530e-03, PNorm = 152.4125, GNorm = 0.0761, lr_0 = 2.2447e-04
Loss = 4.2488e-03, PNorm = 152.4168, GNorm = 0.1327, lr_0 = 2.2432e-04
Loss = 2.1758e-03, PNorm = 152.4220, GNorm = 0.1587, lr_0 = 2.2416e-04
Loss = 2.2713e-03, PNorm = 152.4283, GNorm = 0.2666, lr_0 = 2.2401e-04
Loss = 2.4499e-03, PNorm = 152.4306, GNorm = 0.0692, lr_0 = 2.2386e-04
Loss = 3.3564e-03, PNorm = 152.4338, GNorm = 0.4522, lr_0 = 2.2370e-04
Loss = 5.1909e-03, PNorm = 152.4406, GNorm = 0.0918, lr_0 = 2.2355e-04
Loss = 2.5493e-03, PNorm = 152.4454, GNorm = 0.1257, lr_0 = 2.2340e-04
Loss = 2.3862e-03, PNorm = 152.4483, GNorm = 0.1848, lr_0 = 2.2324e-04
Loss = 2.5607e-03, PNorm = 152.4548, GNorm = 0.1341, lr_0 = 2.2309e-04
Loss = 2.6017e-03, PNorm = 152.4600, GNorm = 0.0754, lr_0 = 2.2294e-04
Loss = 3.4386e-03, PNorm = 152.4635, GNorm = 0.0427, lr_0 = 2.2279e-04
Loss = 3.6731e-03, PNorm = 152.4695, GNorm = 0.1304, lr_0 = 2.2263e-04
Loss = 2.4927e-03, PNorm = 152.4721, GNorm = 0.1814, lr_0 = 2.2248e-04
Loss = 2.1480e-03, PNorm = 152.4775, GNorm = 0.1275, lr_0 = 2.2233e-04
Loss = 2.6104e-03, PNorm = 152.4820, GNorm = 0.0512, lr_0 = 2.2218e-04
Loss = 2.9184e-03, PNorm = 152.4851, GNorm = 0.1466, lr_0 = 2.2202e-04
Loss = 1.9239e-03, PNorm = 152.4888, GNorm = 0.0471, lr_0 = 2.2187e-04
Loss = 2.4685e-03, PNorm = 152.4934, GNorm = 0.1317, lr_0 = 2.2172e-04
Loss = 2.0836e-03, PNorm = 152.4986, GNorm = 0.0711, lr_0 = 2.2157e-04
Loss = 2.2066e-03, PNorm = 152.5007, GNorm = 0.1131, lr_0 = 2.2142e-04
Loss = 3.0409e-03, PNorm = 152.5033, GNorm = 0.1652, lr_0 = 2.2126e-04
Loss = 2.0288e-03, PNorm = 152.5054, GNorm = 0.0667, lr_0 = 2.2111e-04
Loss = 1.8298e-03, PNorm = 152.5097, GNorm = 0.1133, lr_0 = 2.2096e-04
Loss = 2.9616e-03, PNorm = 152.5167, GNorm = 0.3061, lr_0 = 2.2081e-04
Loss = 1.9156e-03, PNorm = 152.5208, GNorm = 0.0822, lr_0 = 2.2066e-04
Loss = 1.8340e-03, PNorm = 152.5263, GNorm = 0.1280, lr_0 = 2.2051e-04
Loss = 2.7884e-03, PNorm = 152.5315, GNorm = 0.0918, lr_0 = 2.2036e-04
Loss = 1.8881e-03, PNorm = 152.5362, GNorm = 0.1451, lr_0 = 2.2021e-04
Loss = 2.4773e-03, PNorm = 152.5398, GNorm = 0.1248, lr_0 = 2.2005e-04
Loss = 2.3817e-03, PNorm = 152.5422, GNorm = 0.0997, lr_0 = 2.1990e-04
Loss = 1.9040e-03, PNorm = 152.5476, GNorm = 0.1262, lr_0 = 2.1975e-04
Loss = 1.8301e-03, PNorm = 152.5510, GNorm = 0.0817, lr_0 = 2.1960e-04
Loss = 3.0626e-03, PNorm = 152.5551, GNorm = 0.0842, lr_0 = 2.1945e-04
Loss = 3.6793e-03, PNorm = 152.5618, GNorm = 0.1312, lr_0 = 2.1930e-04
Loss = 3.3875e-03, PNorm = 152.5687, GNorm = 0.0986, lr_0 = 2.1915e-04
Loss = 1.8367e-03, PNorm = 152.5721, GNorm = 0.2515, lr_0 = 2.1900e-04
Loss = 1.5908e-03, PNorm = 152.5764, GNorm = 0.0959, lr_0 = 2.1885e-04
Loss = 1.9822e-03, PNorm = 152.5818, GNorm = 0.0621, lr_0 = 2.1870e-04
Loss = 2.3161e-03, PNorm = 152.5854, GNorm = 0.0942, lr_0 = 2.1855e-04
Loss = 1.8668e-03, PNorm = 152.5893, GNorm = 0.0791, lr_0 = 2.1840e-04
Loss = 3.8844e-03, PNorm = 152.5949, GNorm = 0.2273, lr_0 = 2.1825e-04
Loss = 2.8280e-03, PNorm = 152.6003, GNorm = 0.2173, lr_0 = 2.1810e-04
Loss = 1.7159e-03, PNorm = 152.6072, GNorm = 0.0413, lr_0 = 2.1795e-04
Loss = 2.0320e-03, PNorm = 152.6102, GNorm = 0.1805, lr_0 = 2.1780e-04
Loss = 3.4741e-03, PNorm = 152.6140, GNorm = 0.1064, lr_0 = 2.1765e-04
Loss = 2.4578e-03, PNorm = 152.6187, GNorm = 0.1700, lr_0 = 2.1751e-04
Loss = 2.1192e-03, PNorm = 152.6246, GNorm = 0.2007, lr_0 = 2.1736e-04
Loss = 2.9251e-03, PNorm = 152.6311, GNorm = 0.2713, lr_0 = 2.1721e-04
Loss = 1.8512e-03, PNorm = 152.6379, GNorm = 0.1057, lr_0 = 2.1706e-04
Loss = 2.1699e-03, PNorm = 152.6440, GNorm = 0.1151, lr_0 = 2.1691e-04
Loss = 2.6319e-03, PNorm = 152.6504, GNorm = 0.2942, lr_0 = 2.1676e-04
Loss = 2.0725e-03, PNorm = 152.6550, GNorm = 0.0782, lr_0 = 2.1661e-04
Loss = 2.2839e-03, PNorm = 152.6601, GNorm = 0.1038, lr_0 = 2.1646e-04
Loss = 2.4089e-03, PNorm = 152.6655, GNorm = 0.0501, lr_0 = 2.1632e-04
Loss = 4.1596e-03, PNorm = 152.6696, GNorm = 0.0886, lr_0 = 2.1617e-04
Loss = 2.6932e-03, PNorm = 152.6750, GNorm = 0.0723, lr_0 = 2.1602e-04
Loss = 2.6233e-03, PNorm = 152.6784, GNorm = 0.2320, lr_0 = 2.1587e-04
Loss = 2.6792e-03, PNorm = 152.6805, GNorm = 0.2717, lr_0 = 2.1572e-04
Loss = 2.3536e-03, PNorm = 152.6854, GNorm = 0.1565, lr_0 = 2.1558e-04
Loss = 2.5040e-03, PNorm = 152.6921, GNorm = 0.0913, lr_0 = 2.1543e-04
Loss = 3.1544e-03, PNorm = 152.6977, GNorm = 0.4686, lr_0 = 2.1528e-04
Loss = 2.1829e-03, PNorm = 152.7040, GNorm = 0.2323, lr_0 = 2.1513e-04
Loss = 2.0174e-03, PNorm = 152.7084, GNorm = 0.0670, lr_0 = 2.1499e-04
Loss = 3.1481e-03, PNorm = 152.7125, GNorm = 0.5322, lr_0 = 2.1484e-04
Loss = 2.2399e-03, PNorm = 152.7199, GNorm = 0.2410, lr_0 = 2.1469e-04
Loss = 1.9686e-03, PNorm = 152.7251, GNorm = 0.1480, lr_0 = 2.1454e-04
Loss = 3.0398e-03, PNorm = 152.7315, GNorm = 0.2301, lr_0 = 2.1440e-04
Loss = 2.5645e-03, PNorm = 152.7377, GNorm = 0.1196, lr_0 = 2.1425e-04
Loss = 2.7453e-03, PNorm = 152.7413, GNorm = 0.2991, lr_0 = 2.1410e-04
Loss = 3.3403e-03, PNorm = 152.7462, GNorm = 0.3821, lr_0 = 2.1396e-04
Loss = 3.2023e-03, PNorm = 152.7476, GNorm = 0.2441, lr_0 = 2.1381e-04
Loss = 2.8650e-03, PNorm = 152.7526, GNorm = 0.2016, lr_0 = 2.1366e-04
Loss = 3.8750e-03, PNorm = 152.7581, GNorm = 0.4332, lr_0 = 2.1352e-04
Loss = 2.5727e-03, PNorm = 152.7640, GNorm = 0.2409, lr_0 = 2.1337e-04
Loss = 2.0263e-03, PNorm = 152.7659, GNorm = 0.2776, lr_0 = 2.1323e-04
Loss = 3.3446e-03, PNorm = 152.7674, GNorm = 0.1038, lr_0 = 2.1308e-04
Loss = 2.3253e-03, PNorm = 152.7710, GNorm = 0.1880, lr_0 = 2.1293e-04
Loss = 1.9876e-03, PNorm = 152.7760, GNorm = 0.0620, lr_0 = 2.1279e-04
Loss = 3.1089e-03, PNorm = 152.7859, GNorm = 0.2125, lr_0 = 2.1264e-04
Loss = 2.9950e-03, PNorm = 152.7928, GNorm = 0.0912, lr_0 = 2.1250e-04
Loss = 3.2676e-03, PNorm = 152.7985, GNorm = 0.2585, lr_0 = 2.1235e-04
Loss = 4.4250e-03, PNorm = 152.8016, GNorm = 0.1581, lr_0 = 2.1221e-04
Loss = 2.3592e-03, PNorm = 152.8052, GNorm = 0.1642, lr_0 = 2.1206e-04
Loss = 4.3031e-03, PNorm = 152.8114, GNorm = 0.1159, lr_0 = 2.1191e-04
Loss = 2.4287e-03, PNorm = 152.8185, GNorm = 0.4744, lr_0 = 2.1177e-04
Loss = 1.9692e-03, PNorm = 152.8247, GNorm = 0.1751, lr_0 = 2.1162e-04
Loss = 3.4920e-03, PNorm = 152.8319, GNorm = 0.2102, lr_0 = 2.1148e-04
Loss = 4.0493e-03, PNorm = 152.8360, GNorm = 0.2287, lr_0 = 2.1133e-04
Loss = 2.3626e-03, PNorm = 152.8398, GNorm = 0.1191, lr_0 = 2.1119e-04
Loss = 3.3240e-03, PNorm = 152.8452, GNorm = 0.1671, lr_0 = 2.1104e-04
Loss = 2.9888e-03, PNorm = 152.8527, GNorm = 0.1876, lr_0 = 2.1090e-04
Loss = 5.6953e-03, PNorm = 152.8566, GNorm = 0.0606, lr_0 = 2.1076e-04
Loss = 4.7332e-03, PNorm = 152.8615, GNorm = 0.1718, lr_0 = 2.1061e-04
Loss = 2.2032e-03, PNorm = 152.8674, GNorm = 0.2959, lr_0 = 2.1047e-04
Loss = 2.0762e-03, PNorm = 152.8733, GNorm = 0.1785, lr_0 = 2.1032e-04
Loss = 2.1377e-03, PNorm = 152.8771, GNorm = 0.1466, lr_0 = 2.1018e-04
Loss = 2.3545e-03, PNorm = 152.8804, GNorm = 0.2663, lr_0 = 2.1003e-04
Loss = 2.8223e-03, PNorm = 152.8845, GNorm = 0.2070, lr_0 = 2.0989e-04
Loss = 3.2198e-03, PNorm = 152.8918, GNorm = 0.0995, lr_0 = 2.0975e-04
Loss = 2.9062e-03, PNorm = 152.8978, GNorm = 0.3257, lr_0 = 2.0960e-04
Validation mae = 0.476520
Epoch 21
Loss = 1.9902e-03, PNorm = 152.9011, GNorm = 0.3160, lr_0 = 2.0946e-04
Loss = 2.5138e-03, PNorm = 152.9022, GNorm = 0.0526, lr_0 = 2.0932e-04
Loss = 2.2722e-03, PNorm = 152.9058, GNorm = 0.0877, lr_0 = 2.0917e-04
Loss = 2.5258e-03, PNorm = 152.9082, GNorm = 0.0662, lr_0 = 2.0903e-04
Loss = 3.6415e-03, PNorm = 152.9107, GNorm = 0.1086, lr_0 = 2.0889e-04
Loss = 1.9102e-03, PNorm = 152.9158, GNorm = 0.0980, lr_0 = 2.0874e-04
Loss = 2.2391e-03, PNorm = 152.9205, GNorm = 0.1420, lr_0 = 2.0860e-04
Loss = 4.4912e-03, PNorm = 152.9282, GNorm = 0.1571, lr_0 = 2.0846e-04
Loss = 2.2219e-03, PNorm = 152.9315, GNorm = 0.1629, lr_0 = 2.0831e-04
Loss = 1.9278e-03, PNorm = 152.9354, GNorm = 0.2205, lr_0 = 2.0817e-04
Loss = 2.1374e-03, PNorm = 152.9376, GNorm = 0.1203, lr_0 = 2.0803e-04
Loss = 1.7988e-03, PNorm = 152.9403, GNorm = 0.1290, lr_0 = 2.0789e-04
Loss = 1.9612e-03, PNorm = 152.9446, GNorm = 0.2211, lr_0 = 2.0774e-04
Loss = 2.5526e-03, PNorm = 152.9501, GNorm = 0.1065, lr_0 = 2.0760e-04
Loss = 1.9840e-03, PNorm = 152.9554, GNorm = 0.1731, lr_0 = 2.0746e-04
Loss = 4.8371e-03, PNorm = 152.9603, GNorm = 0.4537, lr_0 = 2.0732e-04
Loss = 1.8623e-03, PNorm = 152.9674, GNorm = 0.0831, lr_0 = 2.0718e-04
Loss = 2.0090e-03, PNorm = 152.9710, GNorm = 0.1323, lr_0 = 2.0703e-04
Loss = 1.7047e-03, PNorm = 152.9740, GNorm = 0.2280, lr_0 = 2.0689e-04
Loss = 2.4513e-03, PNorm = 152.9773, GNorm = 0.2146, lr_0 = 2.0675e-04
Loss = 1.4912e-03, PNorm = 152.9840, GNorm = 0.2094, lr_0 = 2.0661e-04
Loss = 1.7717e-03, PNorm = 152.9896, GNorm = 0.1705, lr_0 = 2.0647e-04
Loss = 2.1979e-03, PNorm = 152.9939, GNorm = 0.1245, lr_0 = 2.0633e-04
Loss = 1.5594e-03, PNorm = 152.9962, GNorm = 0.1323, lr_0 = 2.0618e-04
Loss = 1.6871e-03, PNorm = 153.0000, GNorm = 0.0951, lr_0 = 2.0604e-04
Loss = 2.2982e-03, PNorm = 153.0043, GNorm = 0.1055, lr_0 = 2.0590e-04
Loss = 2.4057e-03, PNorm = 153.0066, GNorm = 0.1490, lr_0 = 2.0576e-04
Loss = 2.0139e-03, PNorm = 153.0098, GNorm = 0.1194, lr_0 = 2.0562e-04
Loss = 2.8292e-03, PNorm = 153.0136, GNorm = 0.0678, lr_0 = 2.0548e-04
Loss = 4.1698e-03, PNorm = 153.0177, GNorm = 0.1135, lr_0 = 2.0534e-04
Loss = 4.4680e-03, PNorm = 153.0225, GNorm = 0.1461, lr_0 = 2.0520e-04
Loss = 1.5146e-03, PNorm = 153.0253, GNorm = 0.1403, lr_0 = 2.0506e-04
Loss = 2.4806e-03, PNorm = 153.0280, GNorm = 0.1066, lr_0 = 2.0492e-04
Loss = 2.8753e-03, PNorm = 153.0324, GNorm = 0.1078, lr_0 = 2.0478e-04
Loss = 1.4392e-03, PNorm = 153.0369, GNorm = 0.2032, lr_0 = 2.0464e-04
Loss = 2.0182e-03, PNorm = 153.0393, GNorm = 0.0617, lr_0 = 2.0450e-04
Loss = 1.7658e-03, PNorm = 153.0432, GNorm = 0.1156, lr_0 = 2.0436e-04
Loss = 1.6054e-03, PNorm = 153.0456, GNorm = 0.1340, lr_0 = 2.0422e-04
Loss = 1.8653e-03, PNorm = 153.0502, GNorm = 0.0939, lr_0 = 2.0408e-04
Loss = 1.6673e-03, PNorm = 153.0559, GNorm = 0.1794, lr_0 = 2.0394e-04
Loss = 1.9582e-03, PNorm = 153.0596, GNorm = 0.0932, lr_0 = 2.0380e-04
Loss = 2.2654e-03, PNorm = 153.0643, GNorm = 0.0708, lr_0 = 2.0366e-04
Loss = 3.2240e-03, PNorm = 153.0682, GNorm = 0.1103, lr_0 = 2.0352e-04
Loss = 2.3458e-03, PNorm = 153.0732, GNorm = 0.0580, lr_0 = 2.0338e-04
Loss = 2.9257e-03, PNorm = 153.0781, GNorm = 0.2883, lr_0 = 2.0324e-04
Loss = 1.5867e-03, PNorm = 153.0823, GNorm = 0.1109, lr_0 = 2.0310e-04
Loss = 1.9454e-03, PNorm = 153.0846, GNorm = 0.1495, lr_0 = 2.0296e-04
Loss = 2.7699e-03, PNorm = 153.0863, GNorm = 0.3851, lr_0 = 2.0282e-04
Loss = 3.3085e-03, PNorm = 153.0920, GNorm = 0.1818, lr_0 = 2.0268e-04
Loss = 1.7406e-03, PNorm = 153.0986, GNorm = 0.1958, lr_0 = 2.0254e-04
Loss = 2.4187e-03, PNorm = 153.1018, GNorm = 0.1351, lr_0 = 2.0240e-04
Loss = 1.7186e-03, PNorm = 153.1063, GNorm = 0.1067, lr_0 = 2.0227e-04
Loss = 1.8641e-03, PNorm = 153.1097, GNorm = 0.2230, lr_0 = 2.0213e-04
Loss = 1.9819e-03, PNorm = 153.1143, GNorm = 0.1807, lr_0 = 2.0199e-04
Loss = 2.1489e-03, PNorm = 153.1186, GNorm = 0.0983, lr_0 = 2.0185e-04
Loss = 1.7698e-03, PNorm = 153.1228, GNorm = 0.1351, lr_0 = 2.0171e-04
Loss = 1.6675e-03, PNorm = 153.1262, GNorm = 0.1513, lr_0 = 2.0157e-04
Loss = 1.8098e-03, PNorm = 153.1316, GNorm = 0.0729, lr_0 = 2.0144e-04
Loss = 3.3535e-03, PNorm = 153.1372, GNorm = 0.4489, lr_0 = 2.0130e-04
Loss = 1.6613e-03, PNorm = 153.1381, GNorm = 0.2116, lr_0 = 2.0116e-04
Loss = 1.6266e-03, PNorm = 153.1405, GNorm = 0.1306, lr_0 = 2.0102e-04
Loss = 1.4052e-03, PNorm = 153.1440, GNorm = 0.0921, lr_0 = 2.0088e-04
Loss = 2.6625e-03, PNorm = 153.1495, GNorm = 0.1256, lr_0 = 2.0075e-04
Loss = 2.5044e-03, PNorm = 153.1547, GNorm = 0.1066, lr_0 = 2.0061e-04
Loss = 2.9003e-03, PNorm = 153.1577, GNorm = 0.2404, lr_0 = 2.0047e-04
Loss = 2.8607e-03, PNorm = 153.1614, GNorm = 0.1403, lr_0 = 2.0033e-04
Loss = 2.9885e-03, PNorm = 153.1632, GNorm = 0.1380, lr_0 = 2.0020e-04
Loss = 2.1685e-03, PNorm = 153.1681, GNorm = 0.3275, lr_0 = 2.0006e-04
Loss = 1.7005e-03, PNorm = 153.1718, GNorm = 0.1543, lr_0 = 1.9992e-04
Loss = 2.1527e-03, PNorm = 153.1789, GNorm = 0.1051, lr_0 = 1.9979e-04
Loss = 2.0207e-03, PNorm = 153.1819, GNorm = 0.1665, lr_0 = 1.9965e-04
Loss = 2.9203e-03, PNorm = 153.1876, GNorm = 0.1370, lr_0 = 1.9951e-04
Loss = 1.9356e-03, PNorm = 153.1926, GNorm = 0.1415, lr_0 = 1.9938e-04
Loss = 1.7824e-03, PNorm = 153.1964, GNorm = 0.0398, lr_0 = 1.9924e-04
Loss = 2.4449e-03, PNorm = 153.2010, GNorm = 0.2698, lr_0 = 1.9910e-04
Loss = 2.1713e-03, PNorm = 153.2051, GNorm = 0.0322, lr_0 = 1.9897e-04
Loss = 2.0636e-03, PNorm = 153.2095, GNorm = 0.0760, lr_0 = 1.9883e-04
Loss = 1.6808e-03, PNorm = 153.2142, GNorm = 0.1116, lr_0 = 1.9869e-04
Loss = 2.5198e-03, PNorm = 153.2185, GNorm = 0.3841, lr_0 = 1.9856e-04
Loss = 1.9443e-03, PNorm = 153.2216, GNorm = 0.0983, lr_0 = 1.9842e-04
Loss = 1.5437e-03, PNorm = 153.2257, GNorm = 0.0847, lr_0 = 1.9829e-04
Loss = 1.5759e-03, PNorm = 153.2285, GNorm = 0.0990, lr_0 = 1.9815e-04
Loss = 2.0528e-03, PNorm = 153.2331, GNorm = 0.1610, lr_0 = 1.9801e-04
Loss = 2.3495e-03, PNorm = 153.2368, GNorm = 0.2884, lr_0 = 1.9788e-04
Loss = 2.6427e-03, PNorm = 153.2414, GNorm = 0.1772, lr_0 = 1.9774e-04
Loss = 2.4161e-03, PNorm = 153.2466, GNorm = 0.1210, lr_0 = 1.9761e-04
Loss = 2.1506e-03, PNorm = 153.2485, GNorm = 0.1577, lr_0 = 1.9747e-04
Loss = 1.6600e-03, PNorm = 153.2505, GNorm = 0.1312, lr_0 = 1.9734e-04
Loss = 1.7371e-03, PNorm = 153.2543, GNorm = 0.0806, lr_0 = 1.9720e-04
Loss = 1.8269e-03, PNorm = 153.2578, GNorm = 0.0729, lr_0 = 1.9707e-04
Loss = 1.6496e-03, PNorm = 153.2629, GNorm = 0.1903, lr_0 = 1.9693e-04
Loss = 1.7055e-03, PNorm = 153.2676, GNorm = 0.0605, lr_0 = 1.9680e-04
Loss = 2.8471e-03, PNorm = 153.2721, GNorm = 0.0798, lr_0 = 1.9666e-04
Loss = 2.8319e-03, PNorm = 153.2752, GNorm = 0.1872, lr_0 = 1.9653e-04
Loss = 4.4865e-03, PNorm = 153.2781, GNorm = 0.0338, lr_0 = 1.9639e-04
Loss = 1.8328e-03, PNorm = 153.2820, GNorm = 0.0588, lr_0 = 1.9626e-04
Loss = 2.4989e-03, PNorm = 153.2892, GNorm = 0.0909, lr_0 = 1.9612e-04
Loss = 1.7853e-03, PNorm = 153.2937, GNorm = 0.0973, lr_0 = 1.9599e-04
Loss = 2.2095e-03, PNorm = 153.2988, GNorm = 0.0846, lr_0 = 1.9585e-04
Loss = 4.4299e-03, PNorm = 153.3047, GNorm = 0.1989, lr_0 = 1.9572e-04
Loss = 3.9413e-03, PNorm = 153.3108, GNorm = 0.0932, lr_0 = 1.9559e-04
Loss = 1.9235e-03, PNorm = 153.3162, GNorm = 0.0755, lr_0 = 1.9545e-04
Loss = 3.1336e-03, PNorm = 153.3173, GNorm = 0.0988, lr_0 = 1.9532e-04
Loss = 3.6478e-03, PNorm = 153.3207, GNorm = 0.2927, lr_0 = 1.9518e-04
Loss = 2.0049e-03, PNorm = 153.3239, GNorm = 0.0874, lr_0 = 1.9505e-04
Loss = 1.8494e-03, PNorm = 153.3262, GNorm = 0.2607, lr_0 = 1.9492e-04
Loss = 3.6977e-03, PNorm = 153.3306, GNorm = 0.2798, lr_0 = 1.9478e-04
Loss = 2.2934e-03, PNorm = 153.3331, GNorm = 0.1570, lr_0 = 1.9465e-04
Loss = 5.6317e-03, PNorm = 153.3363, GNorm = 0.0737, lr_0 = 1.9452e-04
Loss = 1.5795e-03, PNorm = 153.3406, GNorm = 0.0670, lr_0 = 1.9438e-04
Loss = 2.6142e-03, PNorm = 153.3463, GNorm = 0.1196, lr_0 = 1.9425e-04
Loss = 1.9458e-03, PNorm = 153.3514, GNorm = 0.0967, lr_0 = 1.9412e-04
Loss = 1.8378e-03, PNorm = 153.3525, GNorm = 0.2173, lr_0 = 1.9398e-04
Loss = 1.8016e-03, PNorm = 153.3565, GNorm = 0.1099, lr_0 = 1.9385e-04
Loss = 2.1097e-03, PNorm = 153.3611, GNorm = 0.0595, lr_0 = 1.9372e-04
Loss = 2.4518e-03, PNorm = 153.3654, GNorm = 0.1256, lr_0 = 1.9359e-04
Loss = 2.7304e-03, PNorm = 153.3719, GNorm = 0.1456, lr_0 = 1.9345e-04
Loss = 3.4914e-03, PNorm = 153.3769, GNorm = 0.1274, lr_0 = 1.9332e-04
Loss = 2.3001e-03, PNorm = 153.3798, GNorm = 0.1199, lr_0 = 1.9319e-04
Loss = 3.7165e-03, PNorm = 153.3809, GNorm = 0.2813, lr_0 = 1.9306e-04
Validation mae = 0.476272
Epoch 22
Loss = 1.8041e-03, PNorm = 153.3841, GNorm = 0.1415, lr_0 = 1.9292e-04
Loss = 1.4480e-03, PNorm = 153.3850, GNorm = 0.1567, lr_0 = 1.9279e-04
Loss = 2.7488e-03, PNorm = 153.3894, GNorm = 0.1377, lr_0 = 1.9266e-04
Loss = 1.6210e-03, PNorm = 153.3950, GNorm = 0.0787, lr_0 = 1.9253e-04
Loss = 1.6645e-03, PNorm = 153.3988, GNorm = 0.1128, lr_0 = 1.9240e-04
Loss = 1.8712e-03, PNorm = 153.4041, GNorm = 0.0613, lr_0 = 1.9226e-04
Loss = 1.6538e-03, PNorm = 153.4070, GNorm = 0.1869, lr_0 = 1.9213e-04
Loss = 2.5303e-03, PNorm = 153.4091, GNorm = 0.1910, lr_0 = 1.9200e-04
Loss = 2.3820e-03, PNorm = 153.4104, GNorm = 0.1245, lr_0 = 1.9187e-04
Loss = 2.0754e-03, PNorm = 153.4136, GNorm = 0.1341, lr_0 = 1.9174e-04
Loss = 2.1307e-03, PNorm = 153.4175, GNorm = 0.3400, lr_0 = 1.9161e-04
Loss = 4.2022e-03, PNorm = 153.4209, GNorm = 0.0972, lr_0 = 1.9148e-04
Loss = 1.3079e-03, PNorm = 153.4232, GNorm = 0.0915, lr_0 = 1.9134e-04
Loss = 2.2059e-03, PNorm = 153.4255, GNorm = 0.1679, lr_0 = 1.9121e-04
Loss = 1.6327e-03, PNorm = 153.4303, GNorm = 0.0728, lr_0 = 1.9108e-04
Loss = 2.1862e-03, PNorm = 153.4351, GNorm = 0.0533, lr_0 = 1.9095e-04
Loss = 1.2936e-03, PNorm = 153.4379, GNorm = 0.1048, lr_0 = 1.9082e-04
Loss = 1.3830e-03, PNorm = 153.4417, GNorm = 0.0396, lr_0 = 1.9069e-04
Loss = 1.1528e-03, PNorm = 153.4419, GNorm = 0.0746, lr_0 = 1.9056e-04
Loss = 1.2906e-03, PNorm = 153.4423, GNorm = 0.0565, lr_0 = 1.9043e-04
Loss = 1.9031e-03, PNorm = 153.4421, GNorm = 0.2110, lr_0 = 1.9030e-04
Loss = 1.8921e-03, PNorm = 153.4434, GNorm = 0.1003, lr_0 = 1.9017e-04
Loss = 1.8090e-03, PNorm = 153.4444, GNorm = 0.1391, lr_0 = 1.9004e-04
Loss = 1.8422e-03, PNorm = 153.4485, GNorm = 0.1129, lr_0 = 1.8991e-04
Loss = 2.4986e-03, PNorm = 153.4506, GNorm = 0.1230, lr_0 = 1.8978e-04
Loss = 1.8485e-03, PNorm = 153.4538, GNorm = 0.0798, lr_0 = 1.8965e-04
Loss = 1.8051e-03, PNorm = 153.4571, GNorm = 0.0756, lr_0 = 1.8952e-04
Loss = 1.7663e-03, PNorm = 153.4599, GNorm = 0.2138, lr_0 = 1.8939e-04
Loss = 2.0341e-03, PNorm = 153.4633, GNorm = 0.0935, lr_0 = 1.8926e-04
Loss = 1.9817e-03, PNorm = 153.4682, GNorm = 0.1205, lr_0 = 1.8913e-04
Loss = 2.1936e-03, PNorm = 153.4710, GNorm = 0.1717, lr_0 = 1.8900e-04
Loss = 1.6228e-03, PNorm = 153.4732, GNorm = 0.0934, lr_0 = 1.8887e-04
Loss = 2.7015e-03, PNorm = 153.4764, GNorm = 0.1365, lr_0 = 1.8874e-04
Loss = 4.1979e-03, PNorm = 153.4773, GNorm = 0.2010, lr_0 = 1.8861e-04
Loss = 2.6293e-03, PNorm = 153.4818, GNorm = 0.2176, lr_0 = 1.8848e-04
Loss = 2.4022e-03, PNorm = 153.4862, GNorm = 0.1892, lr_0 = 1.8835e-04
Loss = 2.0383e-03, PNorm = 153.4929, GNorm = 0.1058, lr_0 = 1.8822e-04
Loss = 1.4256e-03, PNorm = 153.4971, GNorm = 0.0738, lr_0 = 1.8809e-04
Loss = 1.6021e-03, PNorm = 153.4984, GNorm = 0.1317, lr_0 = 1.8797e-04
Loss = 2.6011e-03, PNorm = 153.5009, GNorm = 0.1043, lr_0 = 1.8784e-04
Loss = 1.4043e-03, PNorm = 153.5047, GNorm = 0.1022, lr_0 = 1.8771e-04
Loss = 1.9226e-03, PNorm = 153.5079, GNorm = 0.1075, lr_0 = 1.8758e-04
Loss = 1.3464e-03, PNorm = 153.5097, GNorm = 0.1916, lr_0 = 1.8745e-04
Loss = 1.4749e-03, PNorm = 153.5112, GNorm = 0.1921, lr_0 = 1.8732e-04
Loss = 2.6890e-03, PNorm = 153.5152, GNorm = 0.1291, lr_0 = 1.8719e-04
Loss = 1.2286e-03, PNorm = 153.5208, GNorm = 0.2109, lr_0 = 1.8707e-04
Loss = 1.7609e-03, PNorm = 153.5243, GNorm = 0.2235, lr_0 = 1.8694e-04
Loss = 2.0265e-03, PNorm = 153.5269, GNorm = 0.0933, lr_0 = 1.8681e-04
Loss = 2.2769e-03, PNorm = 153.5298, GNorm = 0.0827, lr_0 = 1.8668e-04
Loss = 1.7673e-03, PNorm = 153.5325, GNorm = 0.1703, lr_0 = 1.8655e-04
Loss = 1.2339e-03, PNorm = 153.5340, GNorm = 0.1442, lr_0 = 1.8643e-04
Loss = 4.2807e-03, PNorm = 153.5375, GNorm = 0.1364, lr_0 = 1.8630e-04
Loss = 1.6673e-03, PNorm = 153.5401, GNorm = 0.0796, lr_0 = 1.8617e-04
Loss = 1.8926e-03, PNorm = 153.5443, GNorm = 0.1867, lr_0 = 1.8604e-04
Loss = 1.9905e-03, PNorm = 153.5487, GNorm = 0.1665, lr_0 = 1.8592e-04
Loss = 1.5239e-03, PNorm = 153.5529, GNorm = 0.0503, lr_0 = 1.8579e-04
Loss = 2.5085e-03, PNorm = 153.5581, GNorm = 0.1083, lr_0 = 1.8566e-04
Loss = 1.7242e-03, PNorm = 153.5628, GNorm = 0.0744, lr_0 = 1.8553e-04
Loss = 3.1077e-03, PNorm = 153.5658, GNorm = 0.2109, lr_0 = 1.8541e-04
Loss = 1.7899e-03, PNorm = 153.5687, GNorm = 0.1169, lr_0 = 1.8528e-04
Loss = 1.9525e-03, PNorm = 153.5709, GNorm = 0.0610, lr_0 = 1.8515e-04
Loss = 3.3065e-03, PNorm = 153.5748, GNorm = 0.6159, lr_0 = 1.8503e-04
Loss = 1.5859e-03, PNorm = 153.5740, GNorm = 0.0911, lr_0 = 1.8490e-04
Loss = 3.7294e-03, PNorm = 153.5778, GNorm = 0.1351, lr_0 = 1.8477e-04
Loss = 2.6304e-03, PNorm = 153.5867, GNorm = 0.2800, lr_0 = 1.8465e-04
Loss = 1.9215e-03, PNorm = 153.5921, GNorm = 0.1843, lr_0 = 1.8452e-04
Loss = 3.2668e-03, PNorm = 153.5970, GNorm = 0.1071, lr_0 = 1.8439e-04
Loss = 3.6772e-03, PNorm = 153.5976, GNorm = 0.0906, lr_0 = 1.8427e-04
Loss = 2.1774e-03, PNorm = 153.6011, GNorm = 0.4611, lr_0 = 1.8414e-04
Loss = 1.7438e-03, PNorm = 153.6011, GNorm = 0.2561, lr_0 = 1.8401e-04
Loss = 1.9797e-03, PNorm = 153.6037, GNorm = 0.1553, lr_0 = 1.8389e-04
Loss = 1.2065e-03, PNorm = 153.6066, GNorm = 0.1148, lr_0 = 1.8376e-04
Loss = 1.2896e-03, PNorm = 153.6097, GNorm = 0.0580, lr_0 = 1.8364e-04
Loss = 1.3964e-03, PNorm = 153.6122, GNorm = 0.0791, lr_0 = 1.8351e-04
Loss = 1.2992e-03, PNorm = 153.6165, GNorm = 0.0440, lr_0 = 1.8338e-04
Loss = 2.9253e-03, PNorm = 153.6206, GNorm = 0.1690, lr_0 = 1.8326e-04
Loss = 2.7259e-03, PNorm = 153.6227, GNorm = 0.1463, lr_0 = 1.8313e-04
Loss = 4.2672e-03, PNorm = 153.6264, GNorm = 0.1241, lr_0 = 1.8301e-04
Loss = 1.7198e-03, PNorm = 153.6313, GNorm = 0.2464, lr_0 = 1.8288e-04
Loss = 1.8378e-03, PNorm = 153.6333, GNorm = 0.3191, lr_0 = 1.8276e-04
Loss = 2.0899e-03, PNorm = 153.6361, GNorm = 0.1647, lr_0 = 1.8263e-04
Loss = 1.3153e-03, PNorm = 153.6382, GNorm = 0.2562, lr_0 = 1.8251e-04
Loss = 2.1458e-03, PNorm = 153.6451, GNorm = 0.1366, lr_0 = 1.8238e-04
Loss = 2.4124e-03, PNorm = 153.6505, GNorm = 0.0974, lr_0 = 1.8226e-04
Loss = 1.5250e-03, PNorm = 153.6538, GNorm = 0.1451, lr_0 = 1.8213e-04
Loss = 3.2186e-03, PNorm = 153.6587, GNorm = 0.1382, lr_0 = 1.8201e-04
Loss = 1.8496e-03, PNorm = 153.6632, GNorm = 0.1443, lr_0 = 1.8188e-04
Loss = 2.5506e-03, PNorm = 153.6663, GNorm = 0.0609, lr_0 = 1.8176e-04
Loss = 2.7020e-03, PNorm = 153.6707, GNorm = 0.2143, lr_0 = 1.8163e-04
Loss = 3.1705e-03, PNorm = 153.6738, GNorm = 0.2532, lr_0 = 1.8151e-04
Loss = 1.6069e-03, PNorm = 153.6799, GNorm = 0.1184, lr_0 = 1.8138e-04
Loss = 1.6185e-03, PNorm = 153.6836, GNorm = 0.1800, lr_0 = 1.8126e-04
Loss = 3.1263e-03, PNorm = 153.6876, GNorm = 0.1288, lr_0 = 1.8114e-04
Loss = 1.7906e-03, PNorm = 153.6894, GNorm = 0.1430, lr_0 = 1.8101e-04
Loss = 1.4442e-03, PNorm = 153.6925, GNorm = 0.0648, lr_0 = 1.8089e-04
Loss = 1.5546e-03, PNorm = 153.6942, GNorm = 0.1577, lr_0 = 1.8076e-04
Loss = 2.4036e-03, PNorm = 153.6972, GNorm = 0.1060, lr_0 = 1.8064e-04
Loss = 1.4710e-03, PNorm = 153.7021, GNorm = 0.1128, lr_0 = 1.8052e-04
Loss = 1.4927e-03, PNorm = 153.7076, GNorm = 0.0659, lr_0 = 1.8039e-04
Loss = 4.5160e-03, PNorm = 153.7097, GNorm = 0.1408, lr_0 = 1.8027e-04
Loss = 2.1934e-03, PNorm = 153.7097, GNorm = 0.1129, lr_0 = 1.8015e-04
Loss = 3.7269e-03, PNorm = 153.7109, GNorm = 0.4510, lr_0 = 1.8002e-04
Loss = 1.5972e-03, PNorm = 153.7124, GNorm = 0.2814, lr_0 = 1.7990e-04
Loss = 2.3033e-03, PNorm = 153.7151, GNorm = 0.1692, lr_0 = 1.7978e-04
Loss = 1.2474e-03, PNorm = 153.7190, GNorm = 0.1546, lr_0 = 1.7965e-04
Loss = 2.3554e-03, PNorm = 153.7217, GNorm = 0.2500, lr_0 = 1.7953e-04
Loss = 1.9189e-03, PNorm = 153.7282, GNorm = 0.1619, lr_0 = 1.7941e-04
Loss = 1.6732e-03, PNorm = 153.7306, GNorm = 0.1167, lr_0 = 1.7928e-04
Loss = 2.9064e-03, PNorm = 153.7342, GNorm = 0.1288, lr_0 = 1.7916e-04
Loss = 1.4365e-03, PNorm = 153.7358, GNorm = 0.1290, lr_0 = 1.7904e-04
Loss = 2.1073e-03, PNorm = 153.7414, GNorm = 0.1115, lr_0 = 1.7892e-04
Loss = 1.6544e-03, PNorm = 153.7471, GNorm = 0.1395, lr_0 = 1.7879e-04
Loss = 1.2842e-03, PNorm = 153.7498, GNorm = 0.0568, lr_0 = 1.7867e-04
Loss = 1.4828e-03, PNorm = 153.7541, GNorm = 0.1426, lr_0 = 1.7855e-04
Loss = 2.2492e-03, PNorm = 153.7586, GNorm = 0.1653, lr_0 = 1.7843e-04
Loss = 1.6603e-03, PNorm = 153.7618, GNorm = 0.1897, lr_0 = 1.7830e-04
Loss = 1.8850e-03, PNorm = 153.7643, GNorm = 0.1705, lr_0 = 1.7818e-04
Loss = 1.9329e-03, PNorm = 153.7685, GNorm = 0.0753, lr_0 = 1.7806e-04
Loss = 1.7250e-03, PNorm = 153.7720, GNorm = 0.2302, lr_0 = 1.7794e-04
Loss = 1.5715e-03, PNorm = 153.7778, GNorm = 0.0749, lr_0 = 1.7782e-04
Validation mae = 0.476165
Epoch 23
Loss = 1.4359e-03, PNorm = 153.7820, GNorm = 0.1668, lr_0 = 1.7769e-04
Loss = 1.3242e-03, PNorm = 153.7821, GNorm = 0.1469, lr_0 = 1.7757e-04
Loss = 1.2546e-03, PNorm = 153.7843, GNorm = 0.0365, lr_0 = 1.7745e-04
Loss = 2.6441e-03, PNorm = 153.7863, GNorm = 0.0525, lr_0 = 1.7733e-04
Loss = 2.6909e-03, PNorm = 153.7888, GNorm = 0.2433, lr_0 = 1.7721e-04
Loss = 1.6020e-03, PNorm = 153.7912, GNorm = 0.0890, lr_0 = 1.7709e-04
Loss = 1.2181e-03, PNorm = 153.7949, GNorm = 0.1134, lr_0 = 1.7696e-04
Loss = 1.2634e-03, PNorm = 153.7984, GNorm = 0.0734, lr_0 = 1.7684e-04
Loss = 1.4608e-03, PNorm = 153.7996, GNorm = 0.1334, lr_0 = 1.7672e-04
Loss = 1.2420e-03, PNorm = 153.8016, GNorm = 0.1036, lr_0 = 1.7660e-04
Loss = 1.1940e-03, PNorm = 153.8045, GNorm = 0.0450, lr_0 = 1.7648e-04
Loss = 1.8561e-03, PNorm = 153.8076, GNorm = 0.0688, lr_0 = 1.7636e-04
Loss = 1.8353e-03, PNorm = 153.8108, GNorm = 0.1067, lr_0 = 1.7624e-04
Loss = 5.3597e-03, PNorm = 153.8124, GNorm = 0.1157, lr_0 = 1.7612e-04
Loss = 1.2147e-03, PNorm = 153.8135, GNorm = 0.0772, lr_0 = 1.7600e-04
Loss = 1.7100e-03, PNorm = 153.8150, GNorm = 0.0746, lr_0 = 1.7588e-04
Loss = 1.2780e-03, PNorm = 153.8176, GNorm = 0.2082, lr_0 = 1.7576e-04
Loss = 1.3380e-03, PNorm = 153.8220, GNorm = 0.1456, lr_0 = 1.7564e-04
Loss = 1.3764e-03, PNorm = 153.8255, GNorm = 0.0497, lr_0 = 1.7552e-04
Loss = 1.2645e-03, PNorm = 153.8257, GNorm = 0.1627, lr_0 = 1.7540e-04
Loss = 1.3183e-03, PNorm = 153.8280, GNorm = 0.0589, lr_0 = 1.7528e-04
Loss = 1.2921e-03, PNorm = 153.8299, GNorm = 0.0766, lr_0 = 1.7516e-04
Loss = 4.1510e-03, PNorm = 153.8293, GNorm = 0.1533, lr_0 = 1.7504e-04
Loss = 1.7829e-03, PNorm = 153.8318, GNorm = 0.2031, lr_0 = 1.7492e-04
Loss = 1.4641e-03, PNorm = 153.8347, GNorm = 0.2389, lr_0 = 1.7480e-04
Loss = 2.5211e-03, PNorm = 153.8419, GNorm = 0.1615, lr_0 = 1.7468e-04
Loss = 3.2407e-03, PNorm = 153.8469, GNorm = 0.1208, lr_0 = 1.7456e-04
Loss = 1.7300e-03, PNorm = 153.8512, GNorm = 0.1096, lr_0 = 1.7444e-04
Loss = 1.5773e-03, PNorm = 153.8569, GNorm = 0.0440, lr_0 = 1.7432e-04
Loss = 1.9812e-03, PNorm = 153.8595, GNorm = 0.1392, lr_0 = 1.7420e-04
Loss = 1.9642e-03, PNorm = 153.8625, GNorm = 0.0823, lr_0 = 1.7408e-04
Loss = 2.2441e-03, PNorm = 153.8668, GNorm = 0.0873, lr_0 = 1.7396e-04
Loss = 2.7689e-03, PNorm = 153.8701, GNorm = 0.0622, lr_0 = 1.7384e-04
Loss = 1.8914e-03, PNorm = 153.8733, GNorm = 0.1630, lr_0 = 1.7372e-04
Loss = 2.5099e-03, PNorm = 153.8772, GNorm = 0.0726, lr_0 = 1.7360e-04
Loss = 1.4899e-03, PNorm = 153.8801, GNorm = 0.0602, lr_0 = 1.7348e-04
Loss = 1.1376e-03, PNorm = 153.8823, GNorm = 0.1171, lr_0 = 1.7336e-04
Loss = 3.1727e-03, PNorm = 153.8851, GNorm = 0.1388, lr_0 = 1.7325e-04
Loss = 1.1542e-03, PNorm = 153.8891, GNorm = 0.1210, lr_0 = 1.7313e-04
Loss = 1.1481e-03, PNorm = 153.8938, GNorm = 0.1217, lr_0 = 1.7301e-04
Loss = 1.8144e-03, PNorm = 153.8969, GNorm = 0.1974, lr_0 = 1.7289e-04
Loss = 1.2474e-03, PNorm = 153.8997, GNorm = 0.0243, lr_0 = 1.7277e-04
Loss = 1.4911e-03, PNorm = 153.9016, GNorm = 0.0878, lr_0 = 1.7265e-04
Loss = 2.0719e-03, PNorm = 153.9034, GNorm = 0.1058, lr_0 = 1.7253e-04
Loss = 3.1621e-03, PNorm = 153.9054, GNorm = 0.0876, lr_0 = 1.7242e-04
Loss = 1.7029e-03, PNorm = 153.9083, GNorm = 0.1254, lr_0 = 1.7230e-04
Loss = 1.8884e-03, PNorm = 153.9120, GNorm = 0.0673, lr_0 = 1.7218e-04
Loss = 1.4399e-03, PNorm = 153.9135, GNorm = 0.1704, lr_0 = 1.7206e-04
Loss = 1.8261e-03, PNorm = 153.9145, GNorm = 0.1017, lr_0 = 1.7194e-04
Loss = 4.0284e-03, PNorm = 153.9154, GNorm = 0.1156, lr_0 = 1.7183e-04
Loss = 2.3865e-03, PNorm = 153.9149, GNorm = 0.4668, lr_0 = 1.7171e-04
Loss = 1.4043e-03, PNorm = 153.9185, GNorm = 0.0796, lr_0 = 1.7159e-04
Loss = 3.1447e-03, PNorm = 153.9207, GNorm = 0.2617, lr_0 = 1.7147e-04
Loss = 1.0831e-03, PNorm = 153.9250, GNorm = 0.1329, lr_0 = 1.7136e-04
Loss = 1.9705e-03, PNorm = 153.9278, GNorm = 0.2850, lr_0 = 1.7124e-04
Loss = 1.3973e-03, PNorm = 153.9302, GNorm = 0.2699, lr_0 = 1.7112e-04
Loss = 1.3812e-03, PNorm = 153.9334, GNorm = 0.1769, lr_0 = 1.7100e-04
Loss = 3.0446e-03, PNorm = 153.9355, GNorm = 0.0758, lr_0 = 1.7089e-04
Loss = 2.0608e-03, PNorm = 153.9402, GNorm = 0.1102, lr_0 = 1.7077e-04
Loss = 2.3511e-03, PNorm = 153.9447, GNorm = 0.1594, lr_0 = 1.7065e-04
Loss = 1.5204e-03, PNorm = 153.9485, GNorm = 0.2183, lr_0 = 1.7054e-04
Loss = 1.4102e-03, PNorm = 153.9503, GNorm = 0.0700, lr_0 = 1.7042e-04
Loss = 1.0875e-03, PNorm = 153.9521, GNorm = 0.0551, lr_0 = 1.7030e-04
Loss = 1.5613e-03, PNorm = 153.9537, GNorm = 0.1361, lr_0 = 1.7019e-04
Loss = 1.1720e-03, PNorm = 153.9576, GNorm = 0.0956, lr_0 = 1.7007e-04
Loss = 1.3506e-03, PNorm = 153.9628, GNorm = 0.0809, lr_0 = 1.6995e-04
Loss = 1.5941e-03, PNorm = 153.9661, GNorm = 0.0873, lr_0 = 1.6984e-04
Loss = 4.3738e-03, PNorm = 153.9683, GNorm = 0.0691, lr_0 = 1.6972e-04
Loss = 1.4631e-03, PNorm = 153.9703, GNorm = 0.0918, lr_0 = 1.6960e-04
Loss = 1.2096e-03, PNorm = 153.9721, GNorm = 0.1729, lr_0 = 1.6949e-04
Loss = 1.6563e-03, PNorm = 153.9734, GNorm = 0.1606, lr_0 = 1.6937e-04
Loss = 1.4091e-03, PNorm = 153.9755, GNorm = 0.0779, lr_0 = 1.6926e-04
Loss = 1.1271e-03, PNorm = 153.9802, GNorm = 0.0830, lr_0 = 1.6914e-04
Loss = 1.7913e-03, PNorm = 153.9856, GNorm = 0.0463, lr_0 = 1.6902e-04
Loss = 2.9856e-03, PNorm = 153.9894, GNorm = 0.0754, lr_0 = 1.6891e-04
Loss = 2.4853e-03, PNorm = 153.9930, GNorm = 0.1068, lr_0 = 1.6879e-04
Loss = 1.6680e-03, PNorm = 153.9985, GNorm = 0.0603, lr_0 = 1.6868e-04
Loss = 2.4904e-03, PNorm = 154.0017, GNorm = 0.1751, lr_0 = 1.6856e-04
Loss = 1.2668e-03, PNorm = 154.0062, GNorm = 0.1180, lr_0 = 1.6845e-04
Loss = 2.2385e-03, PNorm = 154.0088, GNorm = 0.0767, lr_0 = 1.6833e-04
Loss = 1.0229e-03, PNorm = 154.0120, GNorm = 0.1123, lr_0 = 1.6821e-04
Loss = 1.5116e-03, PNorm = 154.0118, GNorm = 0.1277, lr_0 = 1.6810e-04
Loss = 2.4475e-03, PNorm = 154.0129, GNorm = 0.1650, lr_0 = 1.6798e-04
Loss = 1.1416e-03, PNorm = 154.0160, GNorm = 0.0714, lr_0 = 1.6787e-04
Loss = 1.4077e-03, PNorm = 154.0163, GNorm = 0.0621, lr_0 = 1.6775e-04
Loss = 4.2955e-03, PNorm = 154.0178, GNorm = 0.1577, lr_0 = 1.6764e-04
Loss = 2.0966e-03, PNorm = 154.0203, GNorm = 0.0985, lr_0 = 1.6752e-04
Loss = 1.6575e-03, PNorm = 154.0232, GNorm = 0.3333, lr_0 = 1.6741e-04
Loss = 2.3408e-03, PNorm = 154.0292, GNorm = 0.1532, lr_0 = 1.6729e-04
Loss = 2.2306e-03, PNorm = 154.0322, GNorm = 0.0811, lr_0 = 1.6718e-04
Loss = 1.2780e-03, PNorm = 154.0341, GNorm = 0.0998, lr_0 = 1.6707e-04
Loss = 2.1577e-03, PNorm = 154.0384, GNorm = 0.1545, lr_0 = 1.6695e-04
Loss = 1.8687e-03, PNorm = 154.0445, GNorm = 0.1199, lr_0 = 1.6684e-04
Loss = 1.0264e-03, PNorm = 154.0484, GNorm = 0.1640, lr_0 = 1.6672e-04
Loss = 1.2454e-03, PNorm = 154.0514, GNorm = 0.1223, lr_0 = 1.6661e-04
Loss = 2.6759e-03, PNorm = 154.0570, GNorm = 0.1465, lr_0 = 1.6649e-04
Loss = 1.5637e-03, PNorm = 154.0592, GNorm = 0.1501, lr_0 = 1.6638e-04
Loss = 1.3094e-03, PNorm = 154.0620, GNorm = 0.0604, lr_0 = 1.6627e-04
Loss = 1.1415e-03, PNorm = 154.0639, GNorm = 0.0860, lr_0 = 1.6615e-04
Loss = 2.2605e-03, PNorm = 154.0662, GNorm = 0.1445, lr_0 = 1.6604e-04
Loss = 1.3787e-03, PNorm = 154.0711, GNorm = 0.0764, lr_0 = 1.6592e-04
Loss = 1.4774e-03, PNorm = 154.0739, GNorm = 0.0766, lr_0 = 1.6581e-04
Loss = 1.0726e-03, PNorm = 154.0790, GNorm = 0.1201, lr_0 = 1.6570e-04
Loss = 1.8357e-03, PNorm = 154.0816, GNorm = 0.0825, lr_0 = 1.6558e-04
Loss = 3.7203e-03, PNorm = 154.0850, GNorm = 0.1287, lr_0 = 1.6547e-04
Loss = 1.6975e-03, PNorm = 154.0878, GNorm = 0.1722, lr_0 = 1.6536e-04
Loss = 1.3924e-03, PNorm = 154.0917, GNorm = 0.0831, lr_0 = 1.6524e-04
Loss = 2.5475e-03, PNorm = 154.0952, GNorm = 0.0834, lr_0 = 1.6513e-04
Loss = 1.8687e-03, PNorm = 154.0973, GNorm = 0.0701, lr_0 = 1.6502e-04
Loss = 3.3014e-03, PNorm = 154.0968, GNorm = 0.4857, lr_0 = 1.6490e-04
Loss = 2.6775e-03, PNorm = 154.0965, GNorm = 0.0627, lr_0 = 1.6479e-04
Loss = 2.0755e-03, PNorm = 154.0982, GNorm = 0.0682, lr_0 = 1.6468e-04
Loss = 1.1964e-03, PNorm = 154.1006, GNorm = 0.0428, lr_0 = 1.6457e-04
Loss = 1.2341e-03, PNorm = 154.1035, GNorm = 0.0983, lr_0 = 1.6445e-04
Loss = 1.6390e-03, PNorm = 154.1051, GNorm = 0.2278, lr_0 = 1.6434e-04
Loss = 1.3236e-03, PNorm = 154.1074, GNorm = 0.1216, lr_0 = 1.6423e-04
Loss = 2.9906e-03, PNorm = 154.1100, GNorm = 0.0894, lr_0 = 1.6412e-04
Loss = 1.8140e-03, PNorm = 154.1133, GNorm = 0.0359, lr_0 = 1.6400e-04
Loss = 9.8311e-04, PNorm = 154.1166, GNorm = 0.0532, lr_0 = 1.6389e-04
Loss = 1.3349e-03, PNorm = 154.1198, GNorm = 0.0333, lr_0 = 1.6378e-04
Validation mae = 0.475427
Epoch 24
Loss = 1.5627e-03, PNorm = 154.1219, GNorm = 0.1618, lr_0 = 1.6367e-04
Loss = 1.1196e-03, PNorm = 154.1231, GNorm = 0.2072, lr_0 = 1.6355e-04
Loss = 2.3368e-03, PNorm = 154.1241, GNorm = 0.1727, lr_0 = 1.6344e-04
Loss = 1.2539e-03, PNorm = 154.1272, GNorm = 0.0656, lr_0 = 1.6333e-04
Loss = 1.5364e-03, PNorm = 154.1313, GNorm = 0.1521, lr_0 = 1.6322e-04
Loss = 1.3740e-03, PNorm = 154.1343, GNorm = 0.2817, lr_0 = 1.6311e-04
Loss = 1.9642e-03, PNorm = 154.1363, GNorm = 0.1374, lr_0 = 1.6299e-04
Loss = 1.1627e-03, PNorm = 154.1369, GNorm = 0.2053, lr_0 = 1.6288e-04
Loss = 1.6937e-03, PNorm = 154.1404, GNorm = 0.0606, lr_0 = 1.6277e-04
Loss = 2.5722e-03, PNorm = 154.1433, GNorm = 0.2377, lr_0 = 1.6266e-04
Loss = 2.4197e-03, PNorm = 154.1449, GNorm = 0.1282, lr_0 = 1.6255e-04
Loss = 9.0384e-04, PNorm = 154.1477, GNorm = 0.1081, lr_0 = 1.6244e-04
Loss = 1.4814e-03, PNorm = 154.1510, GNorm = 0.1204, lr_0 = 1.6233e-04
Loss = 1.3147e-03, PNorm = 154.1539, GNorm = 0.1839, lr_0 = 1.6221e-04
Loss = 1.1724e-03, PNorm = 154.1576, GNorm = 0.2542, lr_0 = 1.6210e-04
Loss = 1.0640e-03, PNorm = 154.1592, GNorm = 0.1266, lr_0 = 1.6199e-04
Loss = 1.3357e-03, PNorm = 154.1614, GNorm = 0.0723, lr_0 = 1.6188e-04
Loss = 1.0471e-03, PNorm = 154.1633, GNorm = 0.0925, lr_0 = 1.6177e-04
Loss = 9.0485e-04, PNorm = 154.1641, GNorm = 0.0756, lr_0 = 1.6166e-04
Loss = 9.6272e-04, PNorm = 154.1655, GNorm = 0.0847, lr_0 = 1.6155e-04
Loss = 9.1975e-04, PNorm = 154.1702, GNorm = 0.1454, lr_0 = 1.6144e-04
Loss = 1.8673e-03, PNorm = 154.1719, GNorm = 0.1149, lr_0 = 1.6133e-04
Loss = 1.3560e-03, PNorm = 154.1751, GNorm = 0.0517, lr_0 = 1.6122e-04
Loss = 9.9333e-04, PNorm = 154.1770, GNorm = 0.0389, lr_0 = 1.6111e-04
Loss = 1.1589e-03, PNorm = 154.1802, GNorm = 0.0305, lr_0 = 1.6100e-04
Loss = 9.8532e-04, PNorm = 154.1829, GNorm = 0.1093, lr_0 = 1.6089e-04
Loss = 1.0739e-03, PNorm = 154.1862, GNorm = 0.2186, lr_0 = 1.6078e-04
Loss = 1.2063e-03, PNorm = 154.1892, GNorm = 0.0908, lr_0 = 1.6067e-04
Loss = 2.0214e-03, PNorm = 154.1910, GNorm = 0.1476, lr_0 = 1.6056e-04
Loss = 1.2551e-03, PNorm = 154.1905, GNorm = 0.0779, lr_0 = 1.6045e-04
Loss = 2.8984e-03, PNorm = 154.1929, GNorm = 0.1766, lr_0 = 1.6034e-04
Loss = 1.2882e-03, PNorm = 154.1938, GNorm = 0.0659, lr_0 = 1.6023e-04
Loss = 9.7722e-04, PNorm = 154.1960, GNorm = 0.0579, lr_0 = 1.6012e-04
Loss = 2.2797e-03, PNorm = 154.1985, GNorm = 0.0677, lr_0 = 1.6001e-04
Loss = 1.1244e-03, PNorm = 154.1990, GNorm = 0.0435, lr_0 = 1.5990e-04
Loss = 2.1281e-03, PNorm = 154.2001, GNorm = 0.0733, lr_0 = 1.5979e-04
Loss = 3.7980e-03, PNorm = 154.2029, GNorm = 0.0726, lr_0 = 1.5968e-04
Loss = 2.5607e-03, PNorm = 154.2040, GNorm = 0.1129, lr_0 = 1.5957e-04
Loss = 9.4765e-04, PNorm = 154.2057, GNorm = 0.1709, lr_0 = 1.5946e-04
Loss = 1.3458e-03, PNorm = 154.2074, GNorm = 0.1168, lr_0 = 1.5935e-04
Loss = 9.4130e-04, PNorm = 154.2120, GNorm = 0.0475, lr_0 = 1.5924e-04
Loss = 1.0576e-03, PNorm = 154.2143, GNorm = 0.1293, lr_0 = 1.5913e-04
Loss = 1.6900e-03, PNorm = 154.2179, GNorm = 0.1939, lr_0 = 1.5902e-04
Loss = 1.5944e-03, PNorm = 154.2197, GNorm = 0.0478, lr_0 = 1.5891e-04
Loss = 1.9847e-03, PNorm = 154.2221, GNorm = 0.0709, lr_0 = 1.5880e-04
Loss = 1.0096e-03, PNorm = 154.2243, GNorm = 0.0408, lr_0 = 1.5870e-04
Loss = 1.7173e-03, PNorm = 154.2258, GNorm = 0.1109, lr_0 = 1.5859e-04
Loss = 1.3667e-03, PNorm = 154.2281, GNorm = 0.1133, lr_0 = 1.5848e-04
Loss = 1.1668e-03, PNorm = 154.2325, GNorm = 0.0285, lr_0 = 1.5837e-04
Loss = 1.6049e-03, PNorm = 154.2352, GNorm = 0.2558, lr_0 = 1.5826e-04
Loss = 2.0685e-03, PNorm = 154.2397, GNorm = 0.1036, lr_0 = 1.5815e-04
Loss = 8.0398e-04, PNorm = 154.2418, GNorm = 0.0446, lr_0 = 1.5804e-04
Loss = 8.9842e-04, PNorm = 154.2447, GNorm = 0.0582, lr_0 = 1.5794e-04
Loss = 2.5275e-03, PNorm = 154.2486, GNorm = 0.1479, lr_0 = 1.5783e-04
Loss = 1.8686e-03, PNorm = 154.2515, GNorm = 0.1448, lr_0 = 1.5772e-04
Loss = 9.7025e-04, PNorm = 154.2552, GNorm = 0.1303, lr_0 = 1.5761e-04
Loss = 1.0893e-03, PNorm = 154.2559, GNorm = 0.0791, lr_0 = 1.5750e-04
Loss = 2.6310e-03, PNorm = 154.2566, GNorm = 0.2669, lr_0 = 1.5740e-04
Loss = 9.3702e-04, PNorm = 154.2568, GNorm = 0.0852, lr_0 = 1.5729e-04
Loss = 2.2328e-03, PNorm = 154.2578, GNorm = 0.0772, lr_0 = 1.5718e-04
Loss = 1.2481e-03, PNorm = 154.2586, GNorm = 0.0346, lr_0 = 1.5707e-04
Loss = 1.1248e-03, PNorm = 154.2600, GNorm = 0.1195, lr_0 = 1.5697e-04
Loss = 1.6477e-03, PNorm = 154.2622, GNorm = 0.0719, lr_0 = 1.5686e-04
Loss = 1.9344e-03, PNorm = 154.2653, GNorm = 0.1160, lr_0 = 1.5675e-04
Loss = 1.6433e-03, PNorm = 154.2670, GNorm = 0.0980, lr_0 = 1.5664e-04
Loss = 2.9704e-03, PNorm = 154.2686, GNorm = 0.1753, lr_0 = 1.5654e-04
Loss = 1.0267e-03, PNorm = 154.2698, GNorm = 0.2765, lr_0 = 1.5643e-04
Loss = 1.2267e-03, PNorm = 154.2717, GNorm = 0.0750, lr_0 = 1.5632e-04
Loss = 1.6079e-03, PNorm = 154.2751, GNorm = 0.0979, lr_0 = 1.5621e-04
Loss = 3.8557e-03, PNorm = 154.2779, GNorm = 0.0807, lr_0 = 1.5611e-04
Loss = 1.5938e-03, PNorm = 154.2807, GNorm = 0.0817, lr_0 = 1.5600e-04
Loss = 1.3307e-03, PNorm = 154.2843, GNorm = 0.1140, lr_0 = 1.5589e-04
Loss = 1.3621e-03, PNorm = 154.2864, GNorm = 0.0449, lr_0 = 1.5579e-04
Loss = 4.7107e-03, PNorm = 154.2869, GNorm = 0.3131, lr_0 = 1.5568e-04
Loss = 1.5500e-03, PNorm = 154.2894, GNorm = 0.0754, lr_0 = 1.5557e-04
Loss = 1.1392e-03, PNorm = 154.2922, GNorm = 0.0700, lr_0 = 1.5547e-04
Loss = 9.2683e-04, PNorm = 154.2971, GNorm = 0.0722, lr_0 = 1.5536e-04
Loss = 1.5105e-03, PNorm = 154.3010, GNorm = 0.0625, lr_0 = 1.5525e-04
Loss = 4.4826e-03, PNorm = 154.3048, GNorm = 0.0427, lr_0 = 1.5515e-04
Loss = 1.6649e-03, PNorm = 154.3060, GNorm = 0.2057, lr_0 = 1.5504e-04
Loss = 1.6558e-03, PNorm = 154.3094, GNorm = 0.1306, lr_0 = 1.5493e-04
Loss = 1.0332e-03, PNorm = 154.3118, GNorm = 0.1260, lr_0 = 1.5483e-04
Loss = 9.9696e-04, PNorm = 154.3164, GNorm = 0.1824, lr_0 = 1.5472e-04
Loss = 1.4054e-03, PNorm = 154.3200, GNorm = 0.0399, lr_0 = 1.5462e-04
Loss = 1.7438e-03, PNorm = 154.3208, GNorm = 0.2679, lr_0 = 1.5451e-04
Loss = 9.8754e-04, PNorm = 154.3222, GNorm = 0.1337, lr_0 = 1.5440e-04
Loss = 9.7207e-04, PNorm = 154.3242, GNorm = 0.0816, lr_0 = 1.5430e-04
Loss = 1.2761e-03, PNorm = 154.3262, GNorm = 0.2061, lr_0 = 1.5419e-04
Loss = 1.0959e-03, PNorm = 154.3280, GNorm = 0.1293, lr_0 = 1.5409e-04
Loss = 1.6793e-03, PNorm = 154.3301, GNorm = 0.0633, lr_0 = 1.5398e-04
Loss = 1.6442e-03, PNorm = 154.3320, GNorm = 0.0808, lr_0 = 1.5388e-04
Loss = 3.2522e-03, PNorm = 154.3348, GNorm = 0.1082, lr_0 = 1.5377e-04
Loss = 1.8015e-03, PNorm = 154.3365, GNorm = 0.0665, lr_0 = 1.5367e-04
Loss = 1.3617e-03, PNorm = 154.3397, GNorm = 0.1238, lr_0 = 1.5356e-04
Loss = 1.2252e-03, PNorm = 154.3432, GNorm = 0.1186, lr_0 = 1.5346e-04
Loss = 2.5986e-03, PNorm = 154.3467, GNorm = 0.0687, lr_0 = 1.5335e-04
Loss = 1.5713e-03, PNorm = 154.3488, GNorm = 0.0850, lr_0 = 1.5325e-04
Loss = 1.0275e-03, PNorm = 154.3509, GNorm = 0.0503, lr_0 = 1.5314e-04
Loss = 2.7261e-03, PNorm = 154.3525, GNorm = 0.1220, lr_0 = 1.5304e-04
Loss = 1.1194e-03, PNorm = 154.3542, GNorm = 0.1091, lr_0 = 1.5293e-04
Loss = 1.8636e-03, PNorm = 154.3564, GNorm = 0.0443, lr_0 = 1.5283e-04
Loss = 2.8137e-03, PNorm = 154.3604, GNorm = 0.0269, lr_0 = 1.5272e-04
Loss = 8.2958e-04, PNorm = 154.3636, GNorm = 0.1253, lr_0 = 1.5262e-04
Loss = 1.6792e-03, PNorm = 154.3668, GNorm = 0.0745, lr_0 = 1.5251e-04
Loss = 2.3844e-03, PNorm = 154.3699, GNorm = 0.4470, lr_0 = 1.5241e-04
Loss = 1.2747e-03, PNorm = 154.3723, GNorm = 0.2164, lr_0 = 1.5230e-04
Loss = 1.5594e-03, PNorm = 154.3752, GNorm = 0.0808, lr_0 = 1.5220e-04
Loss = 1.0119e-03, PNorm = 154.3770, GNorm = 0.0459, lr_0 = 1.5209e-04
Loss = 2.7598e-03, PNorm = 154.3775, GNorm = 0.1730, lr_0 = 1.5199e-04
Loss = 2.6205e-03, PNorm = 154.3825, GNorm = 0.1575, lr_0 = 1.5189e-04
Loss = 2.2398e-03, PNorm = 154.3866, GNorm = 0.2687, lr_0 = 1.5178e-04
Loss = 2.2760e-03, PNorm = 154.3898, GNorm = 0.0487, lr_0 = 1.5168e-04
Loss = 3.9027e-03, PNorm = 154.3927, GNorm = 0.1081, lr_0 = 1.5157e-04
Loss = 3.5195e-03, PNorm = 154.3943, GNorm = 0.0428, lr_0 = 1.5147e-04
Loss = 1.0698e-03, PNorm = 154.3944, GNorm = 0.2410, lr_0 = 1.5137e-04
Loss = 1.3203e-03, PNorm = 154.3962, GNorm = 0.0957, lr_0 = 1.5126e-04
Loss = 3.0450e-03, PNorm = 154.3999, GNorm = 0.1432, lr_0 = 1.5116e-04
Loss = 1.1985e-03, PNorm = 154.4006, GNorm = 0.1176, lr_0 = 1.5106e-04
Loss = 1.0127e-03, PNorm = 154.4016, GNorm = 0.0369, lr_0 = 1.5095e-04
Loss = 1.2733e-03, PNorm = 154.4033, GNorm = 0.0285, lr_0 = 1.5085e-04
Validation mae = 0.475916
Epoch 25
Loss = 2.4344e-03, PNorm = 154.4045, GNorm = 0.1657, lr_0 = 1.5075e-04
Loss = 2.2014e-03, PNorm = 154.4088, GNorm = 0.0366, lr_0 = 1.5064e-04
Loss = 1.2186e-03, PNorm = 154.4105, GNorm = 0.0631, lr_0 = 1.5054e-04
Loss = 1.3161e-03, PNorm = 154.4114, GNorm = 0.0272, lr_0 = 1.5044e-04
Loss = 1.3808e-03, PNorm = 154.4131, GNorm = 0.2325, lr_0 = 1.5033e-04
Loss = 1.6337e-03, PNorm = 154.4155, GNorm = 0.0723, lr_0 = 1.5023e-04
Loss = 1.2564e-03, PNorm = 154.4172, GNorm = 0.1797, lr_0 = 1.5013e-04
Loss = 1.0659e-03, PNorm = 154.4189, GNorm = 0.0930, lr_0 = 1.5002e-04
Loss = 3.8519e-03, PNorm = 154.4210, GNorm = 0.2369, lr_0 = 1.4992e-04
Loss = 1.0179e-03, PNorm = 154.4246, GNorm = 0.0292, lr_0 = 1.4982e-04
Loss = 8.1873e-04, PNorm = 154.4267, GNorm = 0.0585, lr_0 = 1.4972e-04
Loss = 7.3900e-04, PNorm = 154.4293, GNorm = 0.0328, lr_0 = 1.4961e-04
Loss = 1.1263e-03, PNorm = 154.4305, GNorm = 0.0811, lr_0 = 1.4951e-04
Loss = 7.4006e-04, PNorm = 154.4312, GNorm = 0.0737, lr_0 = 1.4941e-04
Loss = 2.8985e-03, PNorm = 154.4336, GNorm = 0.1826, lr_0 = 1.4931e-04
Loss = 1.6958e-03, PNorm = 154.4350, GNorm = 0.0344, lr_0 = 1.4920e-04
Loss = 1.9551e-03, PNorm = 154.4378, GNorm = 0.0763, lr_0 = 1.4910e-04
Loss = 1.6223e-03, PNorm = 154.4400, GNorm = 0.0732, lr_0 = 1.4900e-04
Loss = 1.4833e-03, PNorm = 154.4424, GNorm = 0.0930, lr_0 = 1.4890e-04
Loss = 1.2437e-03, PNorm = 154.4444, GNorm = 0.1009, lr_0 = 1.4880e-04
Loss = 2.4570e-03, PNorm = 154.4471, GNorm = 0.1896, lr_0 = 1.4869e-04
Loss = 1.3167e-03, PNorm = 154.4490, GNorm = 0.0744, lr_0 = 1.4859e-04
Loss = 1.3325e-03, PNorm = 154.4512, GNorm = 0.0790, lr_0 = 1.4849e-04
Loss = 1.4551e-03, PNorm = 154.4527, GNorm = 0.0556, lr_0 = 1.4839e-04
Loss = 2.4184e-03, PNorm = 154.4545, GNorm = 0.0684, lr_0 = 1.4829e-04
Loss = 1.4253e-03, PNorm = 154.4556, GNorm = 0.3712, lr_0 = 1.4818e-04
Loss = 9.4425e-04, PNorm = 154.4572, GNorm = 0.2588, lr_0 = 1.4808e-04
Loss = 1.0496e-03, PNorm = 154.4611, GNorm = 0.2125, lr_0 = 1.4798e-04
Loss = 9.3248e-04, PNorm = 154.4636, GNorm = 0.0980, lr_0 = 1.4788e-04
Loss = 9.9707e-04, PNorm = 154.4661, GNorm = 0.0406, lr_0 = 1.4778e-04
Loss = 1.7623e-03, PNorm = 154.4669, GNorm = 0.0512, lr_0 = 1.4768e-04
Loss = 1.2482e-03, PNorm = 154.4687, GNorm = 0.0712, lr_0 = 1.4758e-04
Loss = 1.3585e-03, PNorm = 154.4716, GNorm = 0.1628, lr_0 = 1.4748e-04
Loss = 2.5654e-03, PNorm = 154.4723, GNorm = 0.0913, lr_0 = 1.4737e-04
Loss = 1.5639e-03, PNorm = 154.4747, GNorm = 0.0818, lr_0 = 1.4727e-04
Loss = 8.7253e-04, PNorm = 154.4750, GNorm = 0.3141, lr_0 = 1.4717e-04
Loss = 4.1802e-03, PNorm = 154.4769, GNorm = 0.2063, lr_0 = 1.4707e-04
Loss = 1.2741e-03, PNorm = 154.4786, GNorm = 0.0909, lr_0 = 1.4697e-04
Loss = 1.1810e-03, PNorm = 154.4809, GNorm = 0.1716, lr_0 = 1.4687e-04
Loss = 2.3219e-03, PNorm = 154.4819, GNorm = 0.0938, lr_0 = 1.4677e-04
Loss = 1.8942e-03, PNorm = 154.4833, GNorm = 0.0780, lr_0 = 1.4667e-04
Loss = 7.8960e-04, PNorm = 154.4849, GNorm = 0.0393, lr_0 = 1.4657e-04
Loss = 9.0487e-04, PNorm = 154.4864, GNorm = 0.0986, lr_0 = 1.4647e-04
Loss = 1.0125e-03, PNorm = 154.4881, GNorm = 0.0926, lr_0 = 1.4637e-04
Loss = 1.6145e-03, PNorm = 154.4893, GNorm = 0.0610, lr_0 = 1.4627e-04
Loss = 1.6503e-03, PNorm = 154.4906, GNorm = 0.3444, lr_0 = 1.4617e-04
Loss = 9.6924e-04, PNorm = 154.4917, GNorm = 0.0593, lr_0 = 1.4607e-04
Loss = 1.0456e-03, PNorm = 154.4938, GNorm = 0.1356, lr_0 = 1.4597e-04
Loss = 8.5024e-04, PNorm = 154.4963, GNorm = 0.0858, lr_0 = 1.4587e-04
Loss = 7.4973e-04, PNorm = 154.4992, GNorm = 0.1764, lr_0 = 1.4577e-04
Loss = 2.2364e-03, PNorm = 154.5027, GNorm = 0.0577, lr_0 = 1.4567e-04
Loss = 1.6113e-03, PNorm = 154.5044, GNorm = 0.1040, lr_0 = 1.4557e-04
Loss = 1.4601e-03, PNorm = 154.5053, GNorm = 0.1420, lr_0 = 1.4547e-04
Loss = 8.7150e-04, PNorm = 154.5079, GNorm = 0.0612, lr_0 = 1.4537e-04
Loss = 1.1415e-03, PNorm = 154.5089, GNorm = 0.1950, lr_0 = 1.4527e-04
Loss = 8.8762e-04, PNorm = 154.5115, GNorm = 0.0422, lr_0 = 1.4517e-04
Loss = 5.2461e-03, PNorm = 154.5147, GNorm = 0.0669, lr_0 = 1.4507e-04
Loss = 1.3589e-03, PNorm = 154.5166, GNorm = 0.0928, lr_0 = 1.4497e-04
Loss = 1.1592e-03, PNorm = 154.5199, GNorm = 0.3597, lr_0 = 1.4487e-04
Loss = 1.9057e-03, PNorm = 154.5220, GNorm = 0.3795, lr_0 = 1.4477e-04
Loss = 9.5720e-04, PNorm = 154.5257, GNorm = 0.1084, lr_0 = 1.4467e-04
Loss = 1.0359e-03, PNorm = 154.5286, GNorm = 0.1763, lr_0 = 1.4457e-04
Loss = 1.7166e-03, PNorm = 154.5311, GNorm = 0.0468, lr_0 = 1.4447e-04
Loss = 8.4220e-04, PNorm = 154.5327, GNorm = 0.1555, lr_0 = 1.4438e-04
Loss = 9.0557e-04, PNorm = 154.5353, GNorm = 0.1350, lr_0 = 1.4428e-04
Loss = 1.8149e-03, PNorm = 154.5352, GNorm = 0.0920, lr_0 = 1.4418e-04
Loss = 7.6950e-04, PNorm = 154.5354, GNorm = 0.0823, lr_0 = 1.4408e-04
Loss = 1.2180e-03, PNorm = 154.5351, GNorm = 0.1434, lr_0 = 1.4398e-04
Loss = 1.0423e-03, PNorm = 154.5368, GNorm = 0.0365, lr_0 = 1.4388e-04
Loss = 1.1638e-03, PNorm = 154.5402, GNorm = 0.0252, lr_0 = 1.4378e-04
Loss = 1.7983e-03, PNorm = 154.5422, GNorm = 0.3621, lr_0 = 1.4368e-04
Loss = 9.1189e-04, PNorm = 154.5431, GNorm = 0.1164, lr_0 = 1.4359e-04
Loss = 1.8077e-03, PNorm = 154.5462, GNorm = 0.0677, lr_0 = 1.4349e-04
Loss = 1.5836e-03, PNorm = 154.5479, GNorm = 0.2994, lr_0 = 1.4339e-04
Loss = 2.5680e-03, PNorm = 154.5494, GNorm = 0.0765, lr_0 = 1.4329e-04
Loss = 2.6774e-03, PNorm = 154.5518, GNorm = 0.0631, lr_0 = 1.4319e-04
Loss = 1.1863e-03, PNorm = 154.5509, GNorm = 0.1967, lr_0 = 1.4310e-04
Loss = 8.9581e-04, PNorm = 154.5537, GNorm = 0.0603, lr_0 = 1.4300e-04
Loss = 1.9991e-03, PNorm = 154.5571, GNorm = 0.0677, lr_0 = 1.4290e-04
Loss = 1.2629e-03, PNorm = 154.5594, GNorm = 0.0959, lr_0 = 1.4280e-04
Loss = 1.0156e-03, PNorm = 154.5611, GNorm = 0.1583, lr_0 = 1.4270e-04
Loss = 1.0383e-03, PNorm = 154.5634, GNorm = 0.0854, lr_0 = 1.4261e-04
Loss = 1.2368e-03, PNorm = 154.5648, GNorm = 0.2977, lr_0 = 1.4251e-04
Loss = 9.1765e-04, PNorm = 154.5665, GNorm = 0.0484, lr_0 = 1.4241e-04
Loss = 9.4406e-04, PNorm = 154.5690, GNorm = 0.1096, lr_0 = 1.4231e-04
Loss = 8.7473e-04, PNorm = 154.5708, GNorm = 0.0602, lr_0 = 1.4222e-04
Loss = 1.1367e-03, PNorm = 154.5733, GNorm = 0.0877, lr_0 = 1.4212e-04
Loss = 2.3190e-03, PNorm = 154.5755, GNorm = 0.0695, lr_0 = 1.4202e-04
Loss = 2.4598e-03, PNorm = 154.5750, GNorm = 0.0628, lr_0 = 1.4192e-04
Loss = 8.9681e-04, PNorm = 154.5778, GNorm = 0.1481, lr_0 = 1.4183e-04
Loss = 1.1111e-03, PNorm = 154.5807, GNorm = 0.1547, lr_0 = 1.4173e-04
Loss = 2.4931e-03, PNorm = 154.5847, GNorm = 0.2042, lr_0 = 1.4163e-04
Loss = 1.0192e-03, PNorm = 154.5881, GNorm = 0.0501, lr_0 = 1.4153e-04
Loss = 1.1719e-03, PNorm = 154.5904, GNorm = 0.0924, lr_0 = 1.4144e-04
Loss = 2.5021e-03, PNorm = 154.5925, GNorm = 0.0447, lr_0 = 1.4134e-04
Loss = 1.7544e-03, PNorm = 154.5942, GNorm = 0.0944, lr_0 = 1.4124e-04
Loss = 2.4105e-03, PNorm = 154.5957, GNorm = 0.0682, lr_0 = 1.4115e-04
Loss = 1.3459e-03, PNorm = 154.5988, GNorm = 0.2215, lr_0 = 1.4105e-04
Loss = 1.5644e-03, PNorm = 154.6013, GNorm = 0.0986, lr_0 = 1.4095e-04
Loss = 9.9451e-04, PNorm = 154.6039, GNorm = 0.0309, lr_0 = 1.4086e-04
Loss = 9.8079e-04, PNorm = 154.6046, GNorm = 0.1041, lr_0 = 1.4076e-04
Loss = 2.0154e-03, PNorm = 154.6065, GNorm = 0.0608, lr_0 = 1.4066e-04
Loss = 1.7693e-03, PNorm = 154.6073, GNorm = 0.1051, lr_0 = 1.4057e-04
Loss = 1.6443e-03, PNorm = 154.6113, GNorm = 0.1059, lr_0 = 1.4047e-04
Loss = 7.8521e-04, PNorm = 154.6124, GNorm = 0.0723, lr_0 = 1.4038e-04
Loss = 1.5639e-03, PNorm = 154.6159, GNorm = 0.1566, lr_0 = 1.4028e-04
Loss = 1.3190e-03, PNorm = 154.6192, GNorm = 0.1199, lr_0 = 1.4018e-04
Loss = 9.2159e-04, PNorm = 154.6222, GNorm = 0.0522, lr_0 = 1.4009e-04
Loss = 1.9471e-03, PNorm = 154.6255, GNorm = 0.1187, lr_0 = 1.3999e-04
Loss = 1.6053e-03, PNorm = 154.6289, GNorm = 0.2710, lr_0 = 1.3990e-04
Loss = 1.0491e-03, PNorm = 154.6311, GNorm = 0.0836, lr_0 = 1.3980e-04
Loss = 1.3914e-03, PNorm = 154.6335, GNorm = 0.0969, lr_0 = 1.3970e-04
Loss = 2.8672e-03, PNorm = 154.6350, GNorm = 0.0772, lr_0 = 1.3961e-04
Loss = 2.9947e-03, PNorm = 154.6371, GNorm = 0.2242, lr_0 = 1.3951e-04
Loss = 1.8975e-03, PNorm = 154.6397, GNorm = 0.4416, lr_0 = 1.3942e-04
Loss = 9.2678e-04, PNorm = 154.6411, GNorm = 0.2076, lr_0 = 1.3932e-04
Loss = 2.1748e-03, PNorm = 154.6412, GNorm = 0.3764, lr_0 = 1.3923e-04
Loss = 2.8612e-03, PNorm = 154.6406, GNorm = 0.0555, lr_0 = 1.3913e-04
Loss = 1.5644e-03, PNorm = 154.6417, GNorm = 0.1497, lr_0 = 1.3904e-04
Loss = 1.0046e-03, PNorm = 154.6448, GNorm = 0.0803, lr_0 = 1.3894e-04
Validation mae = 0.476419
Epoch 26
Loss = 2.0790e-03, PNorm = 154.6447, GNorm = 0.1012, lr_0 = 1.3884e-04
Loss = 7.2724e-04, PNorm = 154.6457, GNorm = 0.0497, lr_0 = 1.3875e-04
Loss = 1.4044e-03, PNorm = 154.6455, GNorm = 0.2087, lr_0 = 1.3865e-04
Loss = 1.0173e-03, PNorm = 154.6459, GNorm = 0.2442, lr_0 = 1.3856e-04
Loss = 1.4077e-03, PNorm = 154.6483, GNorm = 0.0538, lr_0 = 1.3846e-04
Loss = 7.8483e-04, PNorm = 154.6509, GNorm = 0.1569, lr_0 = 1.3837e-04
Loss = 9.4791e-04, PNorm = 154.6532, GNorm = 0.1475, lr_0 = 1.3828e-04
Loss = 1.1611e-03, PNorm = 154.6538, GNorm = 0.0366, lr_0 = 1.3818e-04
Loss = 1.4285e-03, PNorm = 154.6544, GNorm = 0.0699, lr_0 = 1.3809e-04
Loss = 7.1347e-04, PNorm = 154.6573, GNorm = 0.0327, lr_0 = 1.3799e-04
Loss = 1.8832e-03, PNorm = 154.6590, GNorm = 0.0877, lr_0 = 1.3790e-04
Loss = 2.2358e-03, PNorm = 154.6634, GNorm = 0.1616, lr_0 = 1.3780e-04
Loss = 7.6096e-04, PNorm = 154.6656, GNorm = 0.1473, lr_0 = 1.3771e-04
Loss = 1.4314e-03, PNorm = 154.6669, GNorm = 0.0691, lr_0 = 1.3761e-04
Loss = 3.2133e-03, PNorm = 154.6684, GNorm = 0.1340, lr_0 = 1.3752e-04
Loss = 1.2507e-03, PNorm = 154.6702, GNorm = 0.0903, lr_0 = 1.3742e-04
Loss = 1.3638e-03, PNorm = 154.6716, GNorm = 0.3875, lr_0 = 1.3733e-04
Loss = 1.5979e-03, PNorm = 154.6738, GNorm = 0.0485, lr_0 = 1.3724e-04
Loss = 8.0148e-04, PNorm = 154.6762, GNorm = 0.1632, lr_0 = 1.3714e-04
Loss = 1.0891e-03, PNorm = 154.6797, GNorm = 0.1217, lr_0 = 1.3705e-04
Loss = 3.3324e-03, PNorm = 154.6815, GNorm = 0.1327, lr_0 = 1.3695e-04
Loss = 8.7293e-04, PNorm = 154.6826, GNorm = 0.1125, lr_0 = 1.3686e-04
Loss = 9.2593e-04, PNorm = 154.6836, GNorm = 0.1092, lr_0 = 1.3677e-04
Loss = 3.8840e-03, PNorm = 154.6854, GNorm = 0.1386, lr_0 = 1.3667e-04
Loss = 2.0829e-03, PNorm = 154.6879, GNorm = 0.1742, lr_0 = 1.3658e-04
Loss = 1.5036e-03, PNorm = 154.6895, GNorm = 0.0902, lr_0 = 1.3649e-04
Loss = 7.9918e-04, PNorm = 154.6912, GNorm = 0.0674, lr_0 = 1.3639e-04
Loss = 1.0104e-03, PNorm = 154.6934, GNorm = 0.1985, lr_0 = 1.3630e-04
Loss = 7.1427e-04, PNorm = 154.6964, GNorm = 0.0639, lr_0 = 1.3621e-04
Loss = 1.0638e-03, PNorm = 154.6991, GNorm = 0.1012, lr_0 = 1.3611e-04
Loss = 8.9209e-04, PNorm = 154.7000, GNorm = 0.0887, lr_0 = 1.3602e-04
Loss = 2.3504e-03, PNorm = 154.7009, GNorm = 0.1757, lr_0 = 1.3593e-04
Loss = 7.7652e-04, PNorm = 154.7046, GNorm = 0.0999, lr_0 = 1.3583e-04
Loss = 1.6403e-03, PNorm = 154.7080, GNorm = 0.2990, lr_0 = 1.3574e-04
Loss = 1.7657e-03, PNorm = 154.7096, GNorm = 0.1050, lr_0 = 1.3565e-04
Loss = 7.8008e-04, PNorm = 154.7117, GNorm = 0.1653, lr_0 = 1.3555e-04
Loss = 7.3209e-04, PNorm = 154.7132, GNorm = 0.0401, lr_0 = 1.3546e-04
Loss = 7.6599e-04, PNorm = 154.7134, GNorm = 0.0502, lr_0 = 1.3537e-04
Loss = 1.2440e-03, PNorm = 154.7134, GNorm = 0.0858, lr_0 = 1.3528e-04
Loss = 8.4835e-04, PNorm = 154.7143, GNorm = 0.0945, lr_0 = 1.3518e-04
Loss = 2.8169e-03, PNorm = 154.7160, GNorm = 0.3154, lr_0 = 1.3509e-04
Loss = 9.5640e-04, PNorm = 154.7187, GNorm = 0.0417, lr_0 = 1.3500e-04
Loss = 7.7764e-04, PNorm = 154.7210, GNorm = 0.0440, lr_0 = 1.3491e-04
Loss = 1.4110e-03, PNorm = 154.7228, GNorm = 0.0341, lr_0 = 1.3481e-04
Loss = 1.7827e-03, PNorm = 154.7254, GNorm = 0.0439, lr_0 = 1.3472e-04
Loss = 1.5498e-03, PNorm = 154.7261, GNorm = 0.1061, lr_0 = 1.3463e-04
Loss = 1.4654e-03, PNorm = 154.7270, GNorm = 0.1486, lr_0 = 1.3454e-04
Loss = 6.7331e-04, PNorm = 154.7284, GNorm = 0.0452, lr_0 = 1.3444e-04
Loss = 6.4140e-04, PNorm = 154.7307, GNorm = 0.1129, lr_0 = 1.3435e-04
Loss = 1.2641e-03, PNorm = 154.7329, GNorm = 0.3237, lr_0 = 1.3426e-04
Loss = 1.0361e-03, PNorm = 154.7352, GNorm = 0.1009, lr_0 = 1.3417e-04
Loss = 1.5239e-03, PNorm = 154.7366, GNorm = 0.0744, lr_0 = 1.3408e-04
Loss = 9.6071e-04, PNorm = 154.7395, GNorm = 0.0959, lr_0 = 1.3398e-04
Loss = 1.3361e-03, PNorm = 154.7415, GNorm = 0.1224, lr_0 = 1.3389e-04
Loss = 1.9777e-03, PNorm = 154.7428, GNorm = 0.1392, lr_0 = 1.3380e-04
Loss = 2.0602e-03, PNorm = 154.7449, GNorm = 0.1925, lr_0 = 1.3371e-04
Loss = 7.7359e-04, PNorm = 154.7481, GNorm = 0.1341, lr_0 = 1.3362e-04
Loss = 1.7654e-03, PNorm = 154.7505, GNorm = 0.0661, lr_0 = 1.3353e-04
Loss = 7.1932e-04, PNorm = 154.7539, GNorm = 0.0511, lr_0 = 1.3343e-04
Loss = 1.0250e-03, PNorm = 154.7551, GNorm = 0.0917, lr_0 = 1.3334e-04
Loss = 8.7937e-04, PNorm = 154.7582, GNorm = 0.1420, lr_0 = 1.3325e-04
Loss = 9.5338e-04, PNorm = 154.7621, GNorm = 0.1065, lr_0 = 1.3316e-04
Loss = 1.0592e-03, PNorm = 154.7630, GNorm = 0.0500, lr_0 = 1.3307e-04
Loss = 1.3906e-03, PNorm = 154.7637, GNorm = 0.1083, lr_0 = 1.3298e-04
Loss = 6.6437e-04, PNorm = 154.7651, GNorm = 0.0327, lr_0 = 1.3289e-04
Loss = 2.4166e-03, PNorm = 154.7667, GNorm = 0.1854, lr_0 = 1.3280e-04
Loss = 4.7859e-03, PNorm = 154.7670, GNorm = 0.2226, lr_0 = 1.3270e-04
Loss = 1.3476e-03, PNorm = 154.7706, GNorm = 0.0822, lr_0 = 1.3261e-04
Loss = 1.5711e-03, PNorm = 154.7716, GNorm = 0.1349, lr_0 = 1.3252e-04
Loss = 2.3293e-03, PNorm = 154.7719, GNorm = 0.1621, lr_0 = 1.3243e-04
Loss = 1.7146e-03, PNorm = 154.7725, GNorm = 0.0744, lr_0 = 1.3234e-04
Loss = 2.0244e-03, PNorm = 154.7741, GNorm = 0.0486, lr_0 = 1.3225e-04
Loss = 7.9245e-04, PNorm = 154.7747, GNorm = 0.1501, lr_0 = 1.3216e-04
Loss = 6.1447e-04, PNorm = 154.7779, GNorm = 0.0727, lr_0 = 1.3207e-04
Loss = 2.0097e-03, PNorm = 154.7800, GNorm = 0.0954, lr_0 = 1.3198e-04
Loss = 8.9176e-04, PNorm = 154.7814, GNorm = 0.0670, lr_0 = 1.3189e-04
Loss = 1.5607e-03, PNorm = 154.7822, GNorm = 0.0813, lr_0 = 1.3180e-04
Loss = 1.1422e-03, PNorm = 154.7827, GNorm = 0.0635, lr_0 = 1.3171e-04
Loss = 6.8889e-04, PNorm = 154.7834, GNorm = 0.0855, lr_0 = 1.3162e-04
Loss = 1.0669e-03, PNorm = 154.7839, GNorm = 0.0663, lr_0 = 1.3153e-04
Loss = 1.5477e-03, PNorm = 154.7842, GNorm = 0.0845, lr_0 = 1.3144e-04
Loss = 1.4580e-03, PNorm = 154.7850, GNorm = 0.1407, lr_0 = 1.3135e-04
Loss = 9.5536e-04, PNorm = 154.7866, GNorm = 0.0415, lr_0 = 1.3126e-04
Loss = 2.1602e-03, PNorm = 154.7893, GNorm = 0.1545, lr_0 = 1.3117e-04
Loss = 2.7971e-03, PNorm = 154.7897, GNorm = 0.1619, lr_0 = 1.3108e-04
Loss = 1.3124e-03, PNorm = 154.7925, GNorm = 0.1205, lr_0 = 1.3099e-04
Loss = 1.9549e-03, PNorm = 154.7936, GNorm = 0.1786, lr_0 = 1.3090e-04
Loss = 6.8251e-04, PNorm = 154.7980, GNorm = 0.0287, lr_0 = 1.3081e-04
Loss = 1.0762e-03, PNorm = 154.7998, GNorm = 0.0654, lr_0 = 1.3072e-04
Loss = 6.4238e-04, PNorm = 154.8027, GNorm = 0.0854, lr_0 = 1.3063e-04
Loss = 1.4006e-03, PNorm = 154.8063, GNorm = 0.1105, lr_0 = 1.3054e-04
Loss = 9.3975e-04, PNorm = 154.8083, GNorm = 0.0551, lr_0 = 1.3045e-04
Loss = 1.6844e-03, PNorm = 154.8086, GNorm = 0.1799, lr_0 = 1.3036e-04
Loss = 8.9811e-04, PNorm = 154.8090, GNorm = 0.0290, lr_0 = 1.3027e-04
Loss = 1.3686e-03, PNorm = 154.8121, GNorm = 0.1336, lr_0 = 1.3018e-04
Loss = 9.0024e-04, PNorm = 154.8143, GNorm = 0.0374, lr_0 = 1.3009e-04
Loss = 7.0333e-04, PNorm = 154.8162, GNorm = 0.1198, lr_0 = 1.3000e-04
Loss = 7.1829e-04, PNorm = 154.8180, GNorm = 0.1105, lr_0 = 1.2992e-04
Loss = 1.4348e-03, PNorm = 154.8189, GNorm = 0.0661, lr_0 = 1.2983e-04
Loss = 1.8131e-03, PNorm = 154.8189, GNorm = 0.1071, lr_0 = 1.2974e-04
Loss = 2.6614e-03, PNorm = 154.8209, GNorm = 0.1491, lr_0 = 1.2965e-04
Loss = 1.3003e-03, PNorm = 154.8229, GNorm = 0.0613, lr_0 = 1.2956e-04
Loss = 6.4355e-04, PNorm = 154.8251, GNorm = 0.0321, lr_0 = 1.2947e-04
Loss = 1.0668e-03, PNorm = 154.8262, GNorm = 0.1886, lr_0 = 1.2938e-04
Loss = 2.2310e-03, PNorm = 154.8277, GNorm = 0.0712, lr_0 = 1.2929e-04
Loss = 1.0294e-03, PNorm = 154.8287, GNorm = 0.0429, lr_0 = 1.2921e-04
Loss = 7.3297e-04, PNorm = 154.8303, GNorm = 0.0645, lr_0 = 1.2912e-04
Loss = 5.8255e-04, PNorm = 154.8319, GNorm = 0.0568, lr_0 = 1.2903e-04
Loss = 8.3159e-04, PNorm = 154.8336, GNorm = 0.1498, lr_0 = 1.2894e-04
Loss = 2.6082e-03, PNorm = 154.8361, GNorm = 0.0676, lr_0 = 1.2885e-04
Loss = 6.0693e-04, PNorm = 154.8388, GNorm = 0.0978, lr_0 = 1.2876e-04
Loss = 1.6270e-03, PNorm = 154.8387, GNorm = 0.0803, lr_0 = 1.2867e-04
Loss = 1.3084e-03, PNorm = 154.8394, GNorm = 0.1197, lr_0 = 1.2859e-04
Loss = 1.4236e-03, PNorm = 154.8401, GNorm = 0.0801, lr_0 = 1.2850e-04
Loss = 1.4246e-03, PNorm = 154.8414, GNorm = 0.0566, lr_0 = 1.2841e-04
Loss = 2.8962e-03, PNorm = 154.8434, GNorm = 0.0731, lr_0 = 1.2832e-04
Loss = 8.0144e-04, PNorm = 154.8447, GNorm = 0.0274, lr_0 = 1.2823e-04
Loss = 8.4634e-04, PNorm = 154.8475, GNorm = 0.0486, lr_0 = 1.2815e-04
Loss = 1.9970e-03, PNorm = 154.8491, GNorm = 0.0206, lr_0 = 1.2806e-04
Loss = 3.3859e-03, PNorm = 154.8537, GNorm = 0.1919, lr_0 = 1.2797e-04
Validation mae = 0.474771
Epoch 27
Loss = 1.1301e-03, PNorm = 154.8553, GNorm = 0.1334, lr_0 = 1.2788e-04
Loss = 1.0788e-03, PNorm = 154.8558, GNorm = 0.1262, lr_0 = 1.2780e-04
Loss = 1.1592e-03, PNorm = 154.8564, GNorm = 0.1052, lr_0 = 1.2771e-04
Loss = 8.2729e-04, PNorm = 154.8583, GNorm = 0.0730, lr_0 = 1.2762e-04
Loss = 1.9054e-03, PNorm = 154.8604, GNorm = 0.0801, lr_0 = 1.2753e-04
Loss = 1.2290e-03, PNorm = 154.8633, GNorm = 0.1040, lr_0 = 1.2745e-04
Loss = 2.4380e-03, PNorm = 154.8640, GNorm = 0.0660, lr_0 = 1.2736e-04
Loss = 7.1355e-04, PNorm = 154.8652, GNorm = 0.0602, lr_0 = 1.2727e-04
Loss = 9.7684e-04, PNorm = 154.8678, GNorm = 0.0870, lr_0 = 1.2718e-04
Loss = 1.8390e-03, PNorm = 154.8694, GNorm = 0.0523, lr_0 = 1.2710e-04
Loss = 7.5803e-04, PNorm = 154.8715, GNorm = 0.1225, lr_0 = 1.2701e-04
Loss = 8.1493e-04, PNorm = 154.8719, GNorm = 0.0778, lr_0 = 1.2692e-04
Loss = 8.0759e-04, PNorm = 154.8734, GNorm = 0.1531, lr_0 = 1.2684e-04
Loss = 1.8608e-03, PNorm = 154.8747, GNorm = 0.0442, lr_0 = 1.2675e-04
Loss = 1.5935e-03, PNorm = 154.8789, GNorm = 0.1063, lr_0 = 1.2666e-04
Loss = 1.0483e-03, PNorm = 154.8809, GNorm = 0.0304, lr_0 = 1.2658e-04
Loss = 2.1037e-03, PNorm = 154.8821, GNorm = 0.1199, lr_0 = 1.2649e-04
Loss = 1.0035e-03, PNorm = 154.8839, GNorm = 0.0409, lr_0 = 1.2640e-04
Loss = 6.3303e-04, PNorm = 154.8844, GNorm = 0.0667, lr_0 = 1.2632e-04
Loss = 8.6736e-04, PNorm = 154.8853, GNorm = 0.0259, lr_0 = 1.2623e-04
Loss = 7.7650e-04, PNorm = 154.8855, GNorm = 0.0944, lr_0 = 1.2614e-04
Loss = 1.2924e-03, PNorm = 154.8870, GNorm = 0.1071, lr_0 = 1.2606e-04
Loss = 6.7973e-04, PNorm = 154.8886, GNorm = 0.0540, lr_0 = 1.2597e-04
Loss = 6.2361e-04, PNorm = 154.8896, GNorm = 0.0627, lr_0 = 1.2588e-04
Loss = 6.1277e-04, PNorm = 154.8917, GNorm = 0.1072, lr_0 = 1.2580e-04
Loss = 1.2519e-03, PNorm = 154.8942, GNorm = 0.0717, lr_0 = 1.2571e-04
Loss = 4.1507e-03, PNorm = 154.8953, GNorm = 0.0517, lr_0 = 1.2563e-04
Loss = 1.7865e-03, PNorm = 154.8974, GNorm = 0.1459, lr_0 = 1.2554e-04
Loss = 1.4363e-03, PNorm = 154.8974, GNorm = 0.1856, lr_0 = 1.2545e-04
Loss = 1.3886e-03, PNorm = 154.8976, GNorm = 0.0654, lr_0 = 1.2537e-04
Loss = 1.3111e-03, PNorm = 154.8991, GNorm = 0.0629, lr_0 = 1.2528e-04
Loss = 1.8160e-03, PNorm = 154.8991, GNorm = 0.1323, lr_0 = 1.2520e-04
Loss = 8.8887e-04, PNorm = 154.9001, GNorm = 0.0595, lr_0 = 1.2511e-04
Loss = 6.0070e-04, PNorm = 154.9011, GNorm = 0.0496, lr_0 = 1.2502e-04
Loss = 9.0383e-04, PNorm = 154.9031, GNorm = 0.1604, lr_0 = 1.2494e-04
Loss = 6.3484e-04, PNorm = 154.9058, GNorm = 0.0721, lr_0 = 1.2485e-04
Loss = 7.2175e-04, PNorm = 154.9087, GNorm = 0.0335, lr_0 = 1.2477e-04
Loss = 6.4442e-04, PNorm = 154.9102, GNorm = 0.1189, lr_0 = 1.2468e-04
Loss = 2.1241e-03, PNorm = 154.9109, GNorm = 0.0767, lr_0 = 1.2460e-04
Loss = 9.5178e-04, PNorm = 154.9106, GNorm = 0.0881, lr_0 = 1.2451e-04
Loss = 1.0217e-03, PNorm = 154.9120, GNorm = 0.2328, lr_0 = 1.2443e-04
Loss = 1.0073e-03, PNorm = 154.9150, GNorm = 0.2033, lr_0 = 1.2434e-04
Loss = 1.0535e-03, PNorm = 154.9170, GNorm = 0.0398, lr_0 = 1.2426e-04
Loss = 2.4884e-03, PNorm = 154.9166, GNorm = 0.2135, lr_0 = 1.2417e-04
Loss = 1.6312e-03, PNorm = 154.9171, GNorm = 0.2326, lr_0 = 1.2409e-04
Loss = 6.3125e-04, PNorm = 154.9190, GNorm = 0.0998, lr_0 = 1.2400e-04
Loss = 1.0503e-03, PNorm = 154.9199, GNorm = 0.0359, lr_0 = 1.2392e-04
Loss = 6.4939e-04, PNorm = 154.9222, GNorm = 0.0460, lr_0 = 1.2383e-04
Loss = 8.8268e-04, PNorm = 154.9225, GNorm = 0.1192, lr_0 = 1.2375e-04
Loss = 1.2470e-03, PNorm = 154.9227, GNorm = 0.1167, lr_0 = 1.2366e-04
Loss = 6.7911e-04, PNorm = 154.9249, GNorm = 0.0562, lr_0 = 1.2358e-04
Loss = 7.8717e-04, PNorm = 154.9268, GNorm = 0.0711, lr_0 = 1.2349e-04
Loss = 7.7104e-04, PNorm = 154.9286, GNorm = 0.0616, lr_0 = 1.2341e-04
Loss = 2.7045e-03, PNorm = 154.9302, GNorm = 0.0209, lr_0 = 1.2332e-04
Loss = 8.6972e-04, PNorm = 154.9325, GNorm = 0.0741, lr_0 = 1.2324e-04
Loss = 2.5181e-03, PNorm = 154.9352, GNorm = 0.0249, lr_0 = 1.2315e-04
Loss = 1.1425e-03, PNorm = 154.9380, GNorm = 0.2790, lr_0 = 1.2307e-04
Loss = 1.1068e-03, PNorm = 154.9366, GNorm = 0.0312, lr_0 = 1.2298e-04
Loss = 6.5786e-04, PNorm = 154.9383, GNorm = 0.1029, lr_0 = 1.2290e-04
Loss = 7.3362e-04, PNorm = 154.9395, GNorm = 0.0475, lr_0 = 1.2282e-04
Loss = 1.4358e-03, PNorm = 154.9410, GNorm = 0.0497, lr_0 = 1.2273e-04
Loss = 1.2134e-03, PNorm = 154.9423, GNorm = 0.0806, lr_0 = 1.2265e-04
Loss = 8.2600e-04, PNorm = 154.9436, GNorm = 0.2901, lr_0 = 1.2256e-04
Loss = 6.8396e-04, PNorm = 154.9450, GNorm = 0.1621, lr_0 = 1.2248e-04
Loss = 1.4821e-03, PNorm = 154.9471, GNorm = 0.1921, lr_0 = 1.2240e-04
Loss = 1.0054e-03, PNorm = 154.9498, GNorm = 0.0736, lr_0 = 1.2231e-04
Loss = 1.0999e-03, PNorm = 154.9527, GNorm = 0.4242, lr_0 = 1.2223e-04
Loss = 2.1179e-03, PNorm = 154.9532, GNorm = 0.0869, lr_0 = 1.2214e-04
Loss = 2.6419e-03, PNorm = 154.9565, GNorm = 0.0889, lr_0 = 1.2206e-04
Loss = 1.8176e-03, PNorm = 154.9575, GNorm = 0.0727, lr_0 = 1.2198e-04
Loss = 1.0752e-03, PNorm = 154.9582, GNorm = 0.0263, lr_0 = 1.2189e-04
Loss = 1.6254e-03, PNorm = 154.9604, GNorm = 0.2352, lr_0 = 1.2181e-04
Loss = 9.9389e-04, PNorm = 154.9609, GNorm = 0.1275, lr_0 = 1.2173e-04
Loss = 1.1972e-03, PNorm = 154.9602, GNorm = 0.1100, lr_0 = 1.2164e-04
Loss = 1.2482e-03, PNorm = 154.9604, GNorm = 0.1308, lr_0 = 1.2156e-04
Loss = 1.3364e-03, PNorm = 154.9620, GNorm = 0.0471, lr_0 = 1.2148e-04
Loss = 1.8522e-03, PNorm = 154.9643, GNorm = 0.1843, lr_0 = 1.2139e-04
Loss = 1.6779e-03, PNorm = 154.9662, GNorm = 0.0769, lr_0 = 1.2131e-04
Loss = 1.9721e-03, PNorm = 154.9702, GNorm = 0.0936, lr_0 = 1.2123e-04
Loss = 7.9015e-04, PNorm = 154.9724, GNorm = 0.0813, lr_0 = 1.2114e-04
Loss = 1.2691e-03, PNorm = 154.9744, GNorm = 0.3232, lr_0 = 1.2106e-04
Loss = 8.1395e-04, PNorm = 154.9761, GNorm = 0.0472, lr_0 = 1.2098e-04
Loss = 8.4525e-04, PNorm = 154.9779, GNorm = 0.1593, lr_0 = 1.2090e-04
Loss = 2.3255e-03, PNorm = 154.9805, GNorm = 0.1891, lr_0 = 1.2081e-04
Loss = 9.7072e-04, PNorm = 154.9826, GNorm = 0.0685, lr_0 = 1.2073e-04
Loss = 2.8857e-03, PNorm = 154.9854, GNorm = 0.1601, lr_0 = 1.2065e-04
Loss = 5.7624e-04, PNorm = 154.9879, GNorm = 0.0269, lr_0 = 1.2056e-04
Loss = 8.0035e-04, PNorm = 154.9893, GNorm = 0.0689, lr_0 = 1.2048e-04
Loss = 6.4813e-04, PNorm = 154.9913, GNorm = 0.1629, lr_0 = 1.2040e-04
Loss = 9.2716e-04, PNorm = 154.9918, GNorm = 0.1710, lr_0 = 1.2032e-04
Loss = 5.3319e-04, PNorm = 154.9920, GNorm = 0.0278, lr_0 = 1.2023e-04
Loss = 5.7230e-04, PNorm = 154.9935, GNorm = 0.1468, lr_0 = 1.2015e-04
Loss = 1.6735e-03, PNorm = 154.9943, GNorm = 0.1856, lr_0 = 1.2007e-04
Loss = 1.8687e-03, PNorm = 154.9954, GNorm = 0.0584, lr_0 = 1.1999e-04
Loss = 1.7164e-03, PNorm = 154.9964, GNorm = 0.1323, lr_0 = 1.1991e-04
Loss = 6.5837e-04, PNorm = 155.0002, GNorm = 0.0863, lr_0 = 1.1982e-04
Loss = 1.0001e-03, PNorm = 155.0028, GNorm = 0.0399, lr_0 = 1.1974e-04
Loss = 1.9194e-03, PNorm = 155.0037, GNorm = 0.0620, lr_0 = 1.1966e-04
Loss = 2.4065e-03, PNorm = 155.0036, GNorm = 0.0493, lr_0 = 1.1958e-04
Loss = 8.8201e-04, PNorm = 155.0039, GNorm = 0.1808, lr_0 = 1.1950e-04
Loss = 6.7040e-04, PNorm = 155.0059, GNorm = 0.0405, lr_0 = 1.1941e-04
Loss = 1.9836e-03, PNorm = 155.0077, GNorm = 0.1303, lr_0 = 1.1933e-04
Loss = 1.3591e-03, PNorm = 155.0097, GNorm = 0.1224, lr_0 = 1.1925e-04
Loss = 6.7146e-04, PNorm = 155.0114, GNorm = 0.1031, lr_0 = 1.1917e-04
Loss = 6.9120e-04, PNorm = 155.0130, GNorm = 0.1100, lr_0 = 1.1909e-04
Loss = 2.5331e-03, PNorm = 155.0135, GNorm = 0.1774, lr_0 = 1.1901e-04
Loss = 1.0992e-03, PNorm = 155.0148, GNorm = 0.0378, lr_0 = 1.1892e-04
Loss = 9.6691e-04, PNorm = 155.0169, GNorm = 0.1176, lr_0 = 1.1884e-04
Loss = 1.2990e-03, PNorm = 155.0190, GNorm = 0.0356, lr_0 = 1.1876e-04
Loss = 5.8468e-04, PNorm = 155.0192, GNorm = 0.0514, lr_0 = 1.1868e-04
Loss = 3.9593e-03, PNorm = 155.0212, GNorm = 0.0820, lr_0 = 1.1860e-04
Loss = 7.3138e-04, PNorm = 155.0232, GNorm = 0.1251, lr_0 = 1.1852e-04
Loss = 7.7791e-04, PNorm = 155.0259, GNorm = 0.0622, lr_0 = 1.1844e-04
Loss = 1.7664e-03, PNorm = 155.0268, GNorm = 0.2716, lr_0 = 1.1835e-04
Loss = 1.8190e-03, PNorm = 155.0286, GNorm = 0.0631, lr_0 = 1.1827e-04
Loss = 3.3908e-03, PNorm = 155.0308, GNorm = 0.0699, lr_0 = 1.1819e-04
Loss = 1.1551e-03, PNorm = 155.0337, GNorm = 0.1453, lr_0 = 1.1811e-04
Loss = 5.0432e-04, PNorm = 155.0357, GNorm = 0.0563, lr_0 = 1.1803e-04
Loss = 1.3243e-03, PNorm = 155.0375, GNorm = 0.0631, lr_0 = 1.1795e-04
Loss = 1.2447e-03, PNorm = 155.0379, GNorm = 0.1413, lr_0 = 1.1787e-04
Validation mae = 0.475441
Epoch 28
Loss = 6.1766e-04, PNorm = 155.0385, GNorm = 0.0408, lr_0 = 1.1779e-04
Loss = 5.2149e-04, PNorm = 155.0387, GNorm = 0.1143, lr_0 = 1.1771e-04
Loss = 1.6747e-03, PNorm = 155.0401, GNorm = 0.1543, lr_0 = 1.1763e-04
Loss = 4.8958e-04, PNorm = 155.0409, GNorm = 0.0284, lr_0 = 1.1755e-04
Loss = 4.5566e-04, PNorm = 155.0414, GNorm = 0.0878, lr_0 = 1.1747e-04
Loss = 1.7234e-03, PNorm = 155.0408, GNorm = 0.0488, lr_0 = 1.1739e-04
Loss = 1.8064e-03, PNorm = 155.0416, GNorm = 0.0674, lr_0 = 1.1730e-04
Loss = 8.7410e-04, PNorm = 155.0419, GNorm = 0.1126, lr_0 = 1.1722e-04
Loss = 7.6838e-04, PNorm = 155.0415, GNorm = 0.0640, lr_0 = 1.1714e-04
Loss = 1.3892e-03, PNorm = 155.0417, GNorm = 0.0604, lr_0 = 1.1706e-04
Loss = 5.0321e-04, PNorm = 155.0429, GNorm = 0.0539, lr_0 = 1.1698e-04
Loss = 5.5578e-04, PNorm = 155.0443, GNorm = 0.0997, lr_0 = 1.1690e-04
Loss = 1.3379e-03, PNorm = 155.0467, GNorm = 0.0225, lr_0 = 1.1682e-04
Loss = 2.4252e-03, PNorm = 155.0485, GNorm = 0.1166, lr_0 = 1.1674e-04
Loss = 4.5725e-04, PNorm = 155.0476, GNorm = 0.0565, lr_0 = 1.1666e-04
Loss = 5.5986e-04, PNorm = 155.0492, GNorm = 0.1098, lr_0 = 1.1658e-04
Loss = 4.9837e-04, PNorm = 155.0506, GNorm = 0.0393, lr_0 = 1.1650e-04
Loss = 6.3796e-04, PNorm = 155.0506, GNorm = 0.0784, lr_0 = 1.1642e-04
Loss = 1.6619e-03, PNorm = 155.0510, GNorm = 0.0540, lr_0 = 1.1634e-04
Loss = 1.1543e-03, PNorm = 155.0511, GNorm = 0.0371, lr_0 = 1.1626e-04
Loss = 8.5114e-04, PNorm = 155.0525, GNorm = 0.1443, lr_0 = 1.1618e-04
Loss = 5.4629e-04, PNorm = 155.0539, GNorm = 0.1099, lr_0 = 1.1611e-04
Loss = 2.7399e-03, PNorm = 155.0573, GNorm = 0.2142, lr_0 = 1.1603e-04
Loss = 2.1074e-03, PNorm = 155.0590, GNorm = 0.0968, lr_0 = 1.1595e-04
Loss = 9.2230e-04, PNorm = 155.0610, GNorm = 0.0541, lr_0 = 1.1587e-04
Loss = 1.3343e-03, PNorm = 155.0632, GNorm = 0.1068, lr_0 = 1.1579e-04
Loss = 1.5765e-03, PNorm = 155.0650, GNorm = 0.0476, lr_0 = 1.1571e-04
Loss = 4.9462e-04, PNorm = 155.0672, GNorm = 0.0371, lr_0 = 1.1563e-04
Loss = 8.1055e-04, PNorm = 155.0688, GNorm = 0.1187, lr_0 = 1.1555e-04
Loss = 1.1932e-03, PNorm = 155.0692, GNorm = 0.1820, lr_0 = 1.1547e-04
Loss = 5.9156e-04, PNorm = 155.0690, GNorm = 0.0357, lr_0 = 1.1539e-04
Loss = 5.1116e-04, PNorm = 155.0675, GNorm = 0.1039, lr_0 = 1.1531e-04
Loss = 1.3008e-03, PNorm = 155.0687, GNorm = 0.0673, lr_0 = 1.1523e-04
Loss = 1.0289e-03, PNorm = 155.0695, GNorm = 0.0578, lr_0 = 1.1515e-04
Loss = 9.6755e-04, PNorm = 155.0702, GNorm = 0.0494, lr_0 = 1.1508e-04
Loss = 8.6246e-04, PNorm = 155.0710, GNorm = 0.0530, lr_0 = 1.1500e-04
Loss = 6.8063e-04, PNorm = 155.0733, GNorm = 0.1523, lr_0 = 1.1492e-04
Loss = 1.0647e-03, PNorm = 155.0744, GNorm = 0.0848, lr_0 = 1.1484e-04
Loss = 1.5879e-03, PNorm = 155.0762, GNorm = 0.2482, lr_0 = 1.1476e-04
Loss = 2.2789e-03, PNorm = 155.0766, GNorm = 0.1953, lr_0 = 1.1468e-04
Loss = 8.4321e-04, PNorm = 155.0779, GNorm = 0.1469, lr_0 = 1.1460e-04
Loss = 7.7032e-04, PNorm = 155.0787, GNorm = 0.0274, lr_0 = 1.1452e-04
Loss = 8.8421e-04, PNorm = 155.0814, GNorm = 0.1357, lr_0 = 1.1445e-04
Loss = 1.0374e-03, PNorm = 155.0825, GNorm = 0.0717, lr_0 = 1.1437e-04
Loss = 1.5907e-03, PNorm = 155.0844, GNorm = 0.0757, lr_0 = 1.1429e-04
Loss = 6.8970e-04, PNorm = 155.0854, GNorm = 0.0387, lr_0 = 1.1421e-04
Loss = 4.9978e-04, PNorm = 155.0870, GNorm = 0.0921, lr_0 = 1.1413e-04
Loss = 2.6607e-03, PNorm = 155.0879, GNorm = 0.1343, lr_0 = 1.1405e-04
Loss = 1.1476e-03, PNorm = 155.0895, GNorm = 0.0981, lr_0 = 1.1398e-04
Loss = 1.4405e-03, PNorm = 155.0923, GNorm = 0.1446, lr_0 = 1.1390e-04
Loss = 2.4681e-03, PNorm = 155.0939, GNorm = 0.5258, lr_0 = 1.1382e-04
Loss = 1.9971e-03, PNorm = 155.0942, GNorm = 0.1094, lr_0 = 1.1374e-04
Loss = 6.4452e-04, PNorm = 155.0957, GNorm = 0.0296, lr_0 = 1.1366e-04
Loss = 1.6043e-03, PNorm = 155.0969, GNorm = 0.0556, lr_0 = 1.1359e-04
Loss = 4.5572e-04, PNorm = 155.0992, GNorm = 0.0217, lr_0 = 1.1351e-04
Loss = 5.9445e-04, PNorm = 155.1003, GNorm = 0.1004, lr_0 = 1.1343e-04
Loss = 5.8224e-04, PNorm = 155.1014, GNorm = 0.0353, lr_0 = 1.1335e-04
Loss = 1.1080e-03, PNorm = 155.1018, GNorm = 0.0872, lr_0 = 1.1328e-04
Loss = 2.8829e-03, PNorm = 155.1034, GNorm = 0.1147, lr_0 = 1.1320e-04
Loss = 5.4657e-04, PNorm = 155.1039, GNorm = 0.0517, lr_0 = 1.1312e-04
Loss = 6.5244e-04, PNorm = 155.1059, GNorm = 0.1190, lr_0 = 1.1304e-04
Loss = 4.9205e-04, PNorm = 155.1066, GNorm = 0.0407, lr_0 = 1.1297e-04
Loss = 1.6227e-03, PNorm = 155.1078, GNorm = 0.0491, lr_0 = 1.1289e-04
Loss = 1.9811e-03, PNorm = 155.1102, GNorm = 0.1078, lr_0 = 1.1281e-04
Loss = 4.8951e-04, PNorm = 155.1103, GNorm = 0.1002, lr_0 = 1.1273e-04
Loss = 7.7151e-04, PNorm = 155.1108, GNorm = 0.1580, lr_0 = 1.1266e-04
Loss = 1.5256e-03, PNorm = 155.1118, GNorm = 0.1123, lr_0 = 1.1258e-04
Loss = 1.6462e-03, PNorm = 155.1133, GNorm = 0.0239, lr_0 = 1.1250e-04
Loss = 1.5465e-03, PNorm = 155.1139, GNorm = 0.0476, lr_0 = 1.1243e-04
Loss = 1.4386e-03, PNorm = 155.1155, GNorm = 0.0815, lr_0 = 1.1235e-04
Loss = 1.4765e-03, PNorm = 155.1171, GNorm = 0.1066, lr_0 = 1.1227e-04
Loss = 5.5851e-04, PNorm = 155.1192, GNorm = 0.1363, lr_0 = 1.1219e-04
Loss = 1.1728e-03, PNorm = 155.1196, GNorm = 0.0277, lr_0 = 1.1212e-04
Loss = 1.4506e-03, PNorm = 155.1218, GNorm = 0.1045, lr_0 = 1.1204e-04
Loss = 1.9904e-03, PNorm = 155.1247, GNorm = 0.0941, lr_0 = 1.1196e-04
Loss = 5.3519e-04, PNorm = 155.1268, GNorm = 0.0494, lr_0 = 1.1189e-04
Loss = 1.4578e-03, PNorm = 155.1268, GNorm = 0.0596, lr_0 = 1.1181e-04
Loss = 3.3095e-03, PNorm = 155.1267, GNorm = 0.0635, lr_0 = 1.1173e-04
Loss = 6.1707e-04, PNorm = 155.1260, GNorm = 0.1021, lr_0 = 1.1166e-04
Loss = 5.1776e-04, PNorm = 155.1272, GNorm = 0.0535, lr_0 = 1.1158e-04
Loss = 2.0588e-03, PNorm = 155.1276, GNorm = 0.0814, lr_0 = 1.1150e-04
Loss = 6.2917e-04, PNorm = 155.1303, GNorm = 0.0242, lr_0 = 1.1143e-04
Loss = 2.2362e-03, PNorm = 155.1348, GNorm = 0.1532, lr_0 = 1.1135e-04
Loss = 1.2722e-03, PNorm = 155.1360, GNorm = 0.1409, lr_0 = 1.1128e-04
Loss = 1.4434e-03, PNorm = 155.1384, GNorm = 0.1523, lr_0 = 1.1120e-04
Loss = 6.9065e-04, PNorm = 155.1377, GNorm = 0.0979, lr_0 = 1.1112e-04
Loss = 1.4078e-03, PNorm = 155.1391, GNorm = 0.1266, lr_0 = 1.1105e-04
Loss = 9.1152e-04, PNorm = 155.1401, GNorm = 0.1454, lr_0 = 1.1097e-04
Loss = 7.6637e-04, PNorm = 155.1425, GNorm = 0.1493, lr_0 = 1.1089e-04
Loss = 8.9981e-04, PNorm = 155.1455, GNorm = 0.0589, lr_0 = 1.1082e-04
Loss = 1.3443e-03, PNorm = 155.1452, GNorm = 0.0676, lr_0 = 1.1074e-04
Loss = 5.6971e-04, PNorm = 155.1461, GNorm = 0.1624, lr_0 = 1.1067e-04
Loss = 7.3884e-04, PNorm = 155.1476, GNorm = 0.0299, lr_0 = 1.1059e-04
Loss = 7.0260e-04, PNorm = 155.1491, GNorm = 0.0640, lr_0 = 1.1052e-04
Loss = 2.2657e-03, PNorm = 155.1500, GNorm = 0.3593, lr_0 = 1.1044e-04
Loss = 5.5591e-04, PNorm = 155.1496, GNorm = 0.0506, lr_0 = 1.1036e-04
Loss = 8.4785e-04, PNorm = 155.1500, GNorm = 0.1120, lr_0 = 1.1029e-04
Loss = 5.5490e-03, PNorm = 155.1507, GNorm = 0.0440, lr_0 = 1.1021e-04
Loss = 8.3299e-04, PNorm = 155.1527, GNorm = 0.0605, lr_0 = 1.1014e-04
Loss = 2.1225e-03, PNorm = 155.1531, GNorm = 0.0835, lr_0 = 1.1006e-04
Loss = 1.3608e-03, PNorm = 155.1561, GNorm = 0.1033, lr_0 = 1.0999e-04
Loss = 5.8802e-04, PNorm = 155.1569, GNorm = 0.1267, lr_0 = 1.0991e-04
Loss = 5.6448e-04, PNorm = 155.1594, GNorm = 0.0731, lr_0 = 1.0984e-04
Loss = 1.4972e-03, PNorm = 155.1617, GNorm = 0.0906, lr_0 = 1.0976e-04
Loss = 6.4278e-04, PNorm = 155.1646, GNorm = 0.1008, lr_0 = 1.0969e-04
Loss = 1.4314e-03, PNorm = 155.1674, GNorm = 0.1629, lr_0 = 1.0961e-04
Loss = 1.1737e-03, PNorm = 155.1691, GNorm = 0.0400, lr_0 = 1.0954e-04
Loss = 1.7757e-03, PNorm = 155.1700, GNorm = 0.0583, lr_0 = 1.0946e-04
Loss = 5.2845e-04, PNorm = 155.1711, GNorm = 0.0406, lr_0 = 1.0939e-04
Loss = 3.2525e-03, PNorm = 155.1728, GNorm = 0.0208, lr_0 = 1.0931e-04
Loss = 6.4019e-04, PNorm = 155.1752, GNorm = 0.1122, lr_0 = 1.0924e-04
Loss = 6.5759e-04, PNorm = 155.1752, GNorm = 0.0803, lr_0 = 1.0916e-04
Loss = 1.1908e-03, PNorm = 155.1774, GNorm = 0.0595, lr_0 = 1.0909e-04
Loss = 5.4827e-04, PNorm = 155.1779, GNorm = 0.1513, lr_0 = 1.0901e-04
Loss = 6.4687e-04, PNorm = 155.1793, GNorm = 0.0564, lr_0 = 1.0894e-04
Loss = 9.4654e-04, PNorm = 155.1804, GNorm = 0.1461, lr_0 = 1.0886e-04
Loss = 7.0425e-04, PNorm = 155.1826, GNorm = 0.0879, lr_0 = 1.0879e-04
Loss = 5.7500e-04, PNorm = 155.1849, GNorm = 0.0361, lr_0 = 1.0871e-04
Loss = 2.1813e-03, PNorm = 155.1857, GNorm = 0.0372, lr_0 = 1.0864e-04
Loss = 8.0961e-04, PNorm = 155.1865, GNorm = 0.0558, lr_0 = 1.0856e-04
Validation mae = 0.474892
Epoch 29
Loss = 1.1923e-03, PNorm = 155.1862, GNorm = 0.0799, lr_0 = 1.0849e-04
Loss = 7.2931e-04, PNorm = 155.1878, GNorm = 0.0352, lr_0 = 1.0841e-04
Loss = 1.2994e-03, PNorm = 155.1889, GNorm = 0.1014, lr_0 = 1.0834e-04
Loss = 1.5371e-03, PNorm = 155.1900, GNorm = 0.0411, lr_0 = 1.0827e-04
Loss = 9.4464e-04, PNorm = 155.1903, GNorm = 0.0787, lr_0 = 1.0819e-04
Loss = 4.6655e-04, PNorm = 155.1923, GNorm = 0.0956, lr_0 = 1.0812e-04
Loss = 4.7589e-04, PNorm = 155.1941, GNorm = 0.0521, lr_0 = 1.0804e-04
Loss = 1.4395e-03, PNorm = 155.1952, GNorm = 0.0626, lr_0 = 1.0797e-04
Loss = 9.3788e-04, PNorm = 155.1951, GNorm = 0.0429, lr_0 = 1.0790e-04
Loss = 5.9777e-04, PNorm = 155.1970, GNorm = 0.0964, lr_0 = 1.0782e-04
Loss = 4.6071e-04, PNorm = 155.1983, GNorm = 0.0222, lr_0 = 1.0775e-04
Loss = 4.4904e-04, PNorm = 155.2003, GNorm = 0.1314, lr_0 = 1.0767e-04
Loss = 9.7588e-04, PNorm = 155.2010, GNorm = 0.1061, lr_0 = 1.0760e-04
Loss = 6.1135e-04, PNorm = 155.2004, GNorm = 0.0372, lr_0 = 1.0753e-04
Loss = 1.6011e-03, PNorm = 155.2006, GNorm = 0.0964, lr_0 = 1.0745e-04
Loss = 1.0557e-03, PNorm = 155.2016, GNorm = 0.0638, lr_0 = 1.0738e-04
Loss = 1.2552e-03, PNorm = 155.2025, GNorm = 0.1170, lr_0 = 1.0731e-04
Loss = 4.7608e-04, PNorm = 155.2044, GNorm = 0.0962, lr_0 = 1.0723e-04
Loss = 1.1336e-03, PNorm = 155.2060, GNorm = 0.1498, lr_0 = 1.0716e-04
Loss = 1.5617e-03, PNorm = 155.2069, GNorm = 0.0202, lr_0 = 1.0709e-04
Loss = 5.4121e-03, PNorm = 155.2048, GNorm = 0.1233, lr_0 = 1.0701e-04
Loss = 1.5324e-03, PNorm = 155.2066, GNorm = 0.1560, lr_0 = 1.0694e-04
Loss = 1.1619e-03, PNorm = 155.2088, GNorm = 0.1153, lr_0 = 1.0687e-04
Loss = 5.8655e-04, PNorm = 155.2118, GNorm = 0.0634, lr_0 = 1.0679e-04
Loss = 1.8546e-03, PNorm = 155.2135, GNorm = 0.2375, lr_0 = 1.0672e-04
Loss = 1.4975e-03, PNorm = 155.2152, GNorm = 0.1106, lr_0 = 1.0665e-04
Loss = 4.9074e-04, PNorm = 155.2167, GNorm = 0.0424, lr_0 = 1.0657e-04
Loss = 6.8260e-04, PNorm = 155.2179, GNorm = 0.1171, lr_0 = 1.0650e-04
Loss = 6.9702e-04, PNorm = 155.2209, GNorm = 0.1580, lr_0 = 1.0643e-04
Loss = 1.0757e-03, PNorm = 155.2219, GNorm = 0.1514, lr_0 = 1.0635e-04
Loss = 1.0501e-03, PNorm = 155.2235, GNorm = 0.0464, lr_0 = 1.0628e-04
Loss = 1.1442e-03, PNorm = 155.2233, GNorm = 0.0208, lr_0 = 1.0621e-04
Loss = 1.3125e-03, PNorm = 155.2234, GNorm = 0.0406, lr_0 = 1.0614e-04
Loss = 7.0457e-04, PNorm = 155.2241, GNorm = 0.0400, lr_0 = 1.0606e-04
Loss = 1.1744e-03, PNorm = 155.2248, GNorm = 0.2636, lr_0 = 1.0599e-04
Loss = 4.1411e-04, PNorm = 155.2247, GNorm = 0.0384, lr_0 = 1.0592e-04
Loss = 1.0460e-03, PNorm = 155.2257, GNorm = 0.1070, lr_0 = 1.0585e-04
Loss = 1.0276e-03, PNorm = 155.2271, GNorm = 0.0988, lr_0 = 1.0577e-04
Loss = 1.1644e-03, PNorm = 155.2284, GNorm = 0.1651, lr_0 = 1.0570e-04
Loss = 1.2123e-03, PNorm = 155.2298, GNorm = 0.1587, lr_0 = 1.0563e-04
Loss = 3.9192e-04, PNorm = 155.2311, GNorm = 0.0210, lr_0 = 1.0556e-04
Loss = 1.4111e-03, PNorm = 155.2313, GNorm = 0.0545, lr_0 = 1.0548e-04
Loss = 1.0583e-03, PNorm = 155.2314, GNorm = 0.1049, lr_0 = 1.0541e-04
Loss = 3.9230e-04, PNorm = 155.2304, GNorm = 0.0628, lr_0 = 1.0534e-04
Loss = 1.1307e-03, PNorm = 155.2316, GNorm = 0.0433, lr_0 = 1.0527e-04
Loss = 1.0839e-03, PNorm = 155.2309, GNorm = 0.0685, lr_0 = 1.0519e-04
Loss = 8.4946e-04, PNorm = 155.2304, GNorm = 0.1242, lr_0 = 1.0512e-04
Loss = 5.6606e-04, PNorm = 155.2315, GNorm = 0.0442, lr_0 = 1.0505e-04
Loss = 6.7501e-04, PNorm = 155.2334, GNorm = 0.0466, lr_0 = 1.0498e-04
Loss = 4.8504e-04, PNorm = 155.2349, GNorm = 0.0733, lr_0 = 1.0491e-04
Loss = 7.2120e-04, PNorm = 155.2357, GNorm = 0.0489, lr_0 = 1.0483e-04
Loss = 2.1905e-03, PNorm = 155.2363, GNorm = 0.0791, lr_0 = 1.0476e-04
Loss = 1.0330e-03, PNorm = 155.2368, GNorm = 0.0795, lr_0 = 1.0469e-04
Loss = 3.8297e-04, PNorm = 155.2377, GNorm = 0.1006, lr_0 = 1.0462e-04
Loss = 4.7817e-04, PNorm = 155.2381, GNorm = 0.0954, lr_0 = 1.0455e-04
Loss = 6.1415e-04, PNorm = 155.2406, GNorm = 0.0367, lr_0 = 1.0448e-04
Loss = 1.3284e-03, PNorm = 155.2421, GNorm = 0.0694, lr_0 = 1.0440e-04
Loss = 5.0348e-04, PNorm = 155.2430, GNorm = 0.0335, lr_0 = 1.0433e-04
Loss = 1.5181e-03, PNorm = 155.2443, GNorm = 0.0228, lr_0 = 1.0426e-04
Loss = 4.3823e-04, PNorm = 155.2445, GNorm = 0.0716, lr_0 = 1.0419e-04
Loss = 4.2728e-04, PNorm = 155.2455, GNorm = 0.0581, lr_0 = 1.0412e-04
Loss = 8.9456e-04, PNorm = 155.2460, GNorm = 0.1200, lr_0 = 1.0405e-04
Loss = 4.8237e-04, PNorm = 155.2466, GNorm = 0.0570, lr_0 = 1.0398e-04
Loss = 4.1073e-04, PNorm = 155.2472, GNorm = 0.0927, lr_0 = 1.0391e-04
Loss = 1.3132e-03, PNorm = 155.2488, GNorm = 0.0479, lr_0 = 1.0383e-04
Loss = 9.7969e-04, PNorm = 155.2505, GNorm = 0.0430, lr_0 = 1.0376e-04
Loss = 5.5252e-04, PNorm = 155.2517, GNorm = 0.0426, lr_0 = 1.0369e-04
Loss = 4.5325e-04, PNorm = 155.2531, GNorm = 0.0226, lr_0 = 1.0362e-04
Loss = 5.9678e-04, PNorm = 155.2540, GNorm = 0.0325, lr_0 = 1.0355e-04
Loss = 4.1566e-04, PNorm = 155.2541, GNorm = 0.0753, lr_0 = 1.0348e-04
Loss = 8.9493e-04, PNorm = 155.2529, GNorm = 0.0798, lr_0 = 1.0341e-04
Loss = 1.9747e-03, PNorm = 155.2541, GNorm = 0.2000, lr_0 = 1.0334e-04
Loss = 1.0916e-03, PNorm = 155.2548, GNorm = 0.0689, lr_0 = 1.0327e-04
Loss = 2.8261e-03, PNorm = 155.2557, GNorm = 0.1078, lr_0 = 1.0320e-04
Loss = 1.2333e-03, PNorm = 155.2566, GNorm = 0.1170, lr_0 = 1.0312e-04
Loss = 4.7650e-04, PNorm = 155.2574, GNorm = 0.0747, lr_0 = 1.0305e-04
Loss = 2.9892e-03, PNorm = 155.2597, GNorm = 0.0673, lr_0 = 1.0298e-04
Loss = 9.9635e-04, PNorm = 155.2612, GNorm = 0.0795, lr_0 = 1.0291e-04
Loss = 2.7571e-03, PNorm = 155.2632, GNorm = 0.1350, lr_0 = 1.0284e-04
Loss = 1.3679e-03, PNorm = 155.2642, GNorm = 0.0953, lr_0 = 1.0277e-04
Loss = 1.4843e-03, PNorm = 155.2660, GNorm = 0.0961, lr_0 = 1.0270e-04
Loss = 7.0464e-04, PNorm = 155.2687, GNorm = 0.0965, lr_0 = 1.0263e-04
Loss = 9.3713e-04, PNorm = 155.2684, GNorm = 0.1209, lr_0 = 1.0256e-04
Loss = 1.0775e-03, PNorm = 155.2695, GNorm = 0.0621, lr_0 = 1.0249e-04
Loss = 9.9748e-04, PNorm = 155.2711, GNorm = 0.1306, lr_0 = 1.0242e-04
Loss = 1.5250e-03, PNorm = 155.2723, GNorm = 0.0595, lr_0 = 1.0235e-04
Loss = 5.7600e-04, PNorm = 155.2729, GNorm = 0.1706, lr_0 = 1.0228e-04
Loss = 4.1121e-04, PNorm = 155.2747, GNorm = 0.0527, lr_0 = 1.0221e-04
Loss = 1.1564e-03, PNorm = 155.2761, GNorm = 0.3133, lr_0 = 1.0214e-04
Loss = 4.0602e-04, PNorm = 155.2768, GNorm = 0.0791, lr_0 = 1.0207e-04
Loss = 4.2316e-04, PNorm = 155.2778, GNorm = 0.0580, lr_0 = 1.0200e-04
Loss = 7.7319e-04, PNorm = 155.2783, GNorm = 0.0566, lr_0 = 1.0193e-04
Loss = 4.8340e-04, PNorm = 155.2794, GNorm = 0.0222, lr_0 = 1.0186e-04
Loss = 2.3315e-03, PNorm = 155.2806, GNorm = 0.1330, lr_0 = 1.0179e-04
Loss = 9.1121e-04, PNorm = 155.2834, GNorm = 0.0650, lr_0 = 1.0172e-04
Loss = 1.3192e-03, PNorm = 155.2835, GNorm = 0.2834, lr_0 = 1.0165e-04
Loss = 1.3393e-03, PNorm = 155.2848, GNorm = 0.0978, lr_0 = 1.0158e-04
Loss = 5.9544e-04, PNorm = 155.2840, GNorm = 0.0782, lr_0 = 1.0151e-04
Loss = 4.5114e-04, PNorm = 155.2849, GNorm = 0.0763, lr_0 = 1.0144e-04
Loss = 5.4503e-03, PNorm = 155.2861, GNorm = 0.0825, lr_0 = 1.0137e-04
Loss = 4.1618e-04, PNorm = 155.2868, GNorm = 0.0561, lr_0 = 1.0130e-04
Loss = 1.0152e-03, PNorm = 155.2880, GNorm = 0.0495, lr_0 = 1.0123e-04
Loss = 1.0372e-03, PNorm = 155.2898, GNorm = 0.0329, lr_0 = 1.0116e-04
Loss = 2.3910e-03, PNorm = 155.2912, GNorm = 0.0332, lr_0 = 1.0110e-04
Loss = 5.2338e-04, PNorm = 155.2923, GNorm = 0.0667, lr_0 = 1.0103e-04
Loss = 4.1030e-04, PNorm = 155.2924, GNorm = 0.0865, lr_0 = 1.0096e-04
Loss = 4.2271e-04, PNorm = 155.2928, GNorm = 0.0941, lr_0 = 1.0089e-04
Loss = 4.6506e-04, PNorm = 155.2916, GNorm = 0.1235, lr_0 = 1.0082e-04
Loss = 8.6748e-04, PNorm = 155.2936, GNorm = 0.0810, lr_0 = 1.0075e-04
Loss = 3.8511e-04, PNorm = 155.2959, GNorm = 0.0757, lr_0 = 1.0068e-04
Loss = 1.0146e-03, PNorm = 155.2974, GNorm = 0.0857, lr_0 = 1.0061e-04
Loss = 2.0219e-03, PNorm = 155.2991, GNorm = 0.0370, lr_0 = 1.0054e-04
Loss = 2.3570e-03, PNorm = 155.2989, GNorm = 0.0632, lr_0 = 1.0047e-04
Loss = 1.8841e-03, PNorm = 155.2999, GNorm = 0.0333, lr_0 = 1.0041e-04
Loss = 2.7100e-03, PNorm = 155.3002, GNorm = 0.0353, lr_0 = 1.0034e-04
Loss = 1.2634e-03, PNorm = 155.3018, GNorm = 0.0335, lr_0 = 1.0027e-04
Loss = 1.5409e-03, PNorm = 155.3033, GNorm = 0.0792, lr_0 = 1.0020e-04
Loss = 1.6194e-03, PNorm = 155.3059, GNorm = 0.0502, lr_0 = 1.0013e-04
Loss = 6.7482e-04, PNorm = 155.3076, GNorm = 0.0321, lr_0 = 1.0006e-04
Loss = 4.5263e-04, PNorm = 155.3084, GNorm = 0.0262, lr_0 = 1.0000e-04
Validation mae = 0.475475
Model 0 best validation mae = 0.474771 on epoch 26
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.454582
Ensemble test mae = 0.454582
Fold 2
Splitting data with seed 2
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 9.8439e-01, PNorm = 62.2441, GNorm = 2.0068, lr_0 = 1.0413e-04
Loss = 7.4617e-01, PNorm = 62.2557, GNorm = 2.5107, lr_0 = 1.0788e-04
Loss = 6.5366e-01, PNorm = 62.2682, GNorm = 2.3399, lr_0 = 1.1163e-04
Loss = 5.3437e-01, PNorm = 62.2797, GNorm = 2.0849, lr_0 = 1.1537e-04
Loss = 5.0642e-01, PNorm = 62.2895, GNorm = 2.8711, lr_0 = 1.1913e-04
Loss = 4.1307e-01, PNorm = 62.2979, GNorm = 1.7920, lr_0 = 1.2287e-04
Loss = 4.0089e-01, PNorm = 62.3067, GNorm = 1.8311, lr_0 = 1.2663e-04
Loss = 3.8156e-01, PNorm = 62.3157, GNorm = 1.8991, lr_0 = 1.3038e-04
Loss = 3.5192e-01, PNorm = 62.3252, GNorm = 1.5160, lr_0 = 1.3413e-04
Loss = 3.8119e-01, PNorm = 62.3347, GNorm = 2.8545, lr_0 = 1.3788e-04
Loss = 4.1504e-01, PNorm = 62.3445, GNorm = 2.0273, lr_0 = 1.4163e-04
Loss = 3.5812e-01, PNorm = 62.3550, GNorm = 3.4839, lr_0 = 1.4537e-04
Loss = 4.2102e-01, PNorm = 62.3658, GNorm = 3.3713, lr_0 = 1.4913e-04
Loss = 3.3048e-01, PNorm = 62.3776, GNorm = 2.3107, lr_0 = 1.5288e-04
Loss = 3.4182e-01, PNorm = 62.3883, GNorm = 2.7106, lr_0 = 1.5662e-04
Loss = 3.1274e-01, PNorm = 62.3968, GNorm = 1.8310, lr_0 = 1.6038e-04
Loss = 3.1529e-01, PNorm = 62.4065, GNorm = 3.2996, lr_0 = 1.6412e-04
Loss = 3.4462e-01, PNorm = 62.4167, GNorm = 2.3346, lr_0 = 1.6788e-04
Loss = 3.2750e-01, PNorm = 62.4269, GNorm = 1.6033, lr_0 = 1.7163e-04
Loss = 2.9808e-01, PNorm = 62.4362, GNorm = 1.5233, lr_0 = 1.7538e-04
Loss = 2.7134e-01, PNorm = 62.4475, GNorm = 1.8926, lr_0 = 1.7913e-04
Loss = 3.5697e-01, PNorm = 62.4599, GNorm = 2.5888, lr_0 = 1.8288e-04
Loss = 2.7278e-01, PNorm = 62.4729, GNorm = 1.9596, lr_0 = 1.8662e-04
Loss = 2.8089e-01, PNorm = 62.4846, GNorm = 1.4596, lr_0 = 1.9038e-04
Loss = 3.1161e-01, PNorm = 62.4960, GNorm = 1.4739, lr_0 = 1.9413e-04
Loss = 3.1269e-01, PNorm = 62.5096, GNorm = 1.4409, lr_0 = 1.9788e-04
Loss = 3.0073e-01, PNorm = 62.5222, GNorm = 1.6477, lr_0 = 2.0163e-04
Loss = 2.7758e-01, PNorm = 62.5361, GNorm = 1.4937, lr_0 = 2.0537e-04
Loss = 3.0064e-01, PNorm = 62.5503, GNorm = 2.9418, lr_0 = 2.0913e-04
Loss = 3.0907e-01, PNorm = 62.5682, GNorm = 2.3097, lr_0 = 2.1288e-04
Loss = 3.0792e-01, PNorm = 62.5884, GNorm = 1.6225, lr_0 = 2.1663e-04
Loss = 2.6143e-01, PNorm = 62.6039, GNorm = 2.3868, lr_0 = 2.2038e-04
Loss = 2.9800e-01, PNorm = 62.6218, GNorm = 1.6809, lr_0 = 2.2412e-04
Loss = 2.6506e-01, PNorm = 62.6382, GNorm = 1.5988, lr_0 = 2.2787e-04
Loss = 2.6444e-01, PNorm = 62.6544, GNorm = 1.5057, lr_0 = 2.3163e-04
Loss = 2.6492e-01, PNorm = 62.6714, GNorm = 1.4746, lr_0 = 2.3538e-04
Loss = 2.5792e-01, PNorm = 62.6842, GNorm = 1.4985, lr_0 = 2.3913e-04
Loss = 2.6191e-01, PNorm = 62.7014, GNorm = 2.0184, lr_0 = 2.4288e-04
Loss = 2.6820e-01, PNorm = 62.7197, GNorm = 1.6046, lr_0 = 2.4662e-04
Loss = 2.5273e-01, PNorm = 62.7362, GNorm = 1.3638, lr_0 = 2.5038e-04
Loss = 2.6390e-01, PNorm = 62.7564, GNorm = 1.3158, lr_0 = 2.5413e-04
Loss = 2.5817e-01, PNorm = 62.7736, GNorm = 1.5205, lr_0 = 2.5788e-04
Loss = 2.6454e-01, PNorm = 62.7909, GNorm = 1.0483, lr_0 = 2.6163e-04
Loss = 2.4340e-01, PNorm = 62.8118, GNorm = 1.1013, lr_0 = 2.6537e-04
Loss = 2.5870e-01, PNorm = 62.8308, GNorm = 1.6315, lr_0 = 2.6912e-04
Loss = 2.3951e-01, PNorm = 62.8482, GNorm = 1.3028, lr_0 = 2.7288e-04
Loss = 2.8575e-01, PNorm = 62.8695, GNorm = 1.3658, lr_0 = 2.7663e-04
Loss = 2.9371e-01, PNorm = 62.8918, GNorm = 1.3386, lr_0 = 2.8038e-04
Loss = 2.6346e-01, PNorm = 62.9142, GNorm = 1.1423, lr_0 = 2.8413e-04
Loss = 2.5709e-01, PNorm = 62.9364, GNorm = 0.9880, lr_0 = 2.8787e-04
Loss = 2.5600e-01, PNorm = 62.9628, GNorm = 1.3925, lr_0 = 2.9163e-04
Loss = 2.1574e-01, PNorm = 62.9848, GNorm = 0.9086, lr_0 = 2.9538e-04
Loss = 2.5168e-01, PNorm = 63.0073, GNorm = 1.7837, lr_0 = 2.9913e-04
Loss = 2.7774e-01, PNorm = 63.0283, GNorm = 1.2143, lr_0 = 3.0288e-04
Loss = 2.7623e-01, PNorm = 63.0552, GNorm = 1.3465, lr_0 = 3.0662e-04
Loss = 2.5611e-01, PNorm = 63.0805, GNorm = 1.9271, lr_0 = 3.1037e-04
Loss = 2.6284e-01, PNorm = 63.1038, GNorm = 1.3792, lr_0 = 3.1413e-04
Loss = 2.2498e-01, PNorm = 63.1326, GNorm = 1.6232, lr_0 = 3.1788e-04
Loss = 2.5893e-01, PNorm = 63.1565, GNorm = 1.1133, lr_0 = 3.2163e-04
Loss = 2.5390e-01, PNorm = 63.1855, GNorm = 0.9742, lr_0 = 3.2538e-04
Loss = 2.3472e-01, PNorm = 63.2122, GNorm = 1.0480, lr_0 = 3.2912e-04
Loss = 2.5827e-01, PNorm = 63.2373, GNorm = 1.1776, lr_0 = 3.3288e-04
Loss = 2.4862e-01, PNorm = 63.2667, GNorm = 1.2042, lr_0 = 3.3663e-04
Loss = 2.2998e-01, PNorm = 63.2914, GNorm = 1.3641, lr_0 = 3.4038e-04
Loss = 2.7037e-01, PNorm = 63.3200, GNorm = 1.2454, lr_0 = 3.4413e-04
Loss = 2.6650e-01, PNorm = 63.3495, GNorm = 1.4023, lr_0 = 3.4787e-04
Loss = 2.3636e-01, PNorm = 63.3782, GNorm = 1.3493, lr_0 = 3.5162e-04
Loss = 2.0536e-01, PNorm = 63.4103, GNorm = 1.0743, lr_0 = 3.5538e-04
Loss = 2.5033e-01, PNorm = 63.4403, GNorm = 1.2418, lr_0 = 3.5913e-04
Loss = 2.4052e-01, PNorm = 63.4719, GNorm = 1.0190, lr_0 = 3.6288e-04
Loss = 2.3211e-01, PNorm = 63.5024, GNorm = 0.8920, lr_0 = 3.6662e-04
Loss = 2.4836e-01, PNorm = 63.5350, GNorm = 1.4751, lr_0 = 3.7037e-04
Loss = 2.8079e-01, PNorm = 63.5697, GNorm = 1.3124, lr_0 = 3.7413e-04
Loss = 2.6179e-01, PNorm = 63.6088, GNorm = 1.3665, lr_0 = 3.7788e-04
Loss = 2.5015e-01, PNorm = 63.6470, GNorm = 1.5217, lr_0 = 3.8163e-04
Loss = 2.7531e-01, PNorm = 63.6831, GNorm = 1.0310, lr_0 = 3.8537e-04
Loss = 2.0891e-01, PNorm = 63.7233, GNorm = 1.4354, lr_0 = 3.8912e-04
Loss = 2.4877e-01, PNorm = 63.7617, GNorm = 1.1544, lr_0 = 3.9287e-04
Loss = 2.3622e-01, PNorm = 63.7976, GNorm = 1.0356, lr_0 = 3.9663e-04
Loss = 2.1891e-01, PNorm = 63.8327, GNorm = 1.2402, lr_0 = 4.0038e-04
Loss = 2.4569e-01, PNorm = 63.8635, GNorm = 0.8929, lr_0 = 4.0413e-04
Loss = 2.2832e-01, PNorm = 63.9063, GNorm = 0.9554, lr_0 = 4.0787e-04
Loss = 2.4692e-01, PNorm = 63.9384, GNorm = 0.9992, lr_0 = 4.1162e-04
Loss = 2.5774e-01, PNorm = 63.9814, GNorm = 1.5331, lr_0 = 4.1537e-04
Loss = 2.2785e-01, PNorm = 64.0207, GNorm = 1.6569, lr_0 = 4.1913e-04
Loss = 2.6357e-01, PNorm = 64.0629, GNorm = 1.3391, lr_0 = 4.2288e-04
Loss = 2.1873e-01, PNorm = 64.1015, GNorm = 0.8455, lr_0 = 4.2662e-04
Loss = 2.3479e-01, PNorm = 64.1418, GNorm = 0.9341, lr_0 = 4.3037e-04
Loss = 2.1553e-01, PNorm = 64.1787, GNorm = 1.2477, lr_0 = 4.3412e-04
Loss = 2.0104e-01, PNorm = 64.2123, GNorm = 0.8926, lr_0 = 4.3788e-04
Loss = 2.4475e-01, PNorm = 64.2510, GNorm = 1.7386, lr_0 = 4.4163e-04
Loss = 2.4133e-01, PNorm = 64.2938, GNorm = 0.8615, lr_0 = 4.4538e-04
Loss = 2.3376e-01, PNorm = 64.3369, GNorm = 1.7309, lr_0 = 4.4912e-04
Loss = 2.3414e-01, PNorm = 64.3771, GNorm = 0.9427, lr_0 = 4.5287e-04
Loss = 2.5778e-01, PNorm = 64.4227, GNorm = 1.1976, lr_0 = 4.5662e-04
Loss = 2.3549e-01, PNorm = 64.4680, GNorm = 1.6623, lr_0 = 4.6038e-04
Loss = 2.3789e-01, PNorm = 64.5148, GNorm = 1.0779, lr_0 = 4.6413e-04
Loss = 2.0781e-01, PNorm = 64.5674, GNorm = 0.9886, lr_0 = 4.6787e-04
Loss = 2.5712e-01, PNorm = 64.6100, GNorm = 1.2441, lr_0 = 4.7162e-04
Loss = 1.8893e-01, PNorm = 64.6554, GNorm = 1.1178, lr_0 = 4.7537e-04
Loss = 2.5886e-01, PNorm = 64.7039, GNorm = 1.1233, lr_0 = 4.7913e-04
Loss = 2.1856e-01, PNorm = 64.7584, GNorm = 1.0328, lr_0 = 4.8288e-04
Loss = 1.9158e-01, PNorm = 64.8050, GNorm = 0.7475, lr_0 = 4.8663e-04
Loss = 2.2425e-01, PNorm = 64.8510, GNorm = 1.0184, lr_0 = 4.9038e-04
Loss = 1.8461e-01, PNorm = 64.9003, GNorm = 0.9322, lr_0 = 4.9412e-04
Loss = 2.0934e-01, PNorm = 64.9403, GNorm = 1.0137, lr_0 = 4.9788e-04
Loss = 2.2944e-01, PNorm = 64.9900, GNorm = 1.0502, lr_0 = 5.0163e-04
Loss = 2.2535e-01, PNorm = 65.0429, GNorm = 0.7355, lr_0 = 5.0538e-04
Loss = 1.9115e-01, PNorm = 65.0924, GNorm = 0.8901, lr_0 = 5.0913e-04
Loss = 2.2392e-01, PNorm = 65.1387, GNorm = 0.7900, lr_0 = 5.1287e-04
Loss = 2.2868e-01, PNorm = 65.1958, GNorm = 0.6890, lr_0 = 5.1663e-04
Loss = 2.3506e-01, PNorm = 65.2510, GNorm = 1.3946, lr_0 = 5.2038e-04
Loss = 2.3046e-01, PNorm = 65.3010, GNorm = 1.1379, lr_0 = 5.2413e-04
Loss = 2.5229e-01, PNorm = 65.3544, GNorm = 0.9046, lr_0 = 5.2788e-04
Loss = 2.3101e-01, PNorm = 65.4093, GNorm = 0.7981, lr_0 = 5.3162e-04
Loss = 2.1845e-01, PNorm = 65.4679, GNorm = 0.9049, lr_0 = 5.3538e-04
Loss = 1.8419e-01, PNorm = 65.5257, GNorm = 1.1285, lr_0 = 5.3912e-04
Loss = 2.3804e-01, PNorm = 65.5853, GNorm = 1.0068, lr_0 = 5.4288e-04
Loss = 2.0163e-01, PNorm = 65.6481, GNorm = 1.7360, lr_0 = 5.4663e-04
Loss = 1.9738e-01, PNorm = 65.7032, GNorm = 0.6013, lr_0 = 5.5038e-04
Validation mae = 0.561122
Epoch 1
Loss = 1.4750e-01, PNorm = 65.7628, GNorm = 0.8508, lr_0 = 5.5413e-04
Loss = 1.5082e-01, PNorm = 65.8105, GNorm = 0.7101, lr_0 = 5.5787e-04
Loss = 1.6334e-01, PNorm = 65.8572, GNorm = 0.7244, lr_0 = 5.6163e-04
Loss = 1.5843e-01, PNorm = 65.9088, GNorm = 0.6748, lr_0 = 5.6538e-04
Loss = 1.3439e-01, PNorm = 65.9527, GNorm = 0.7457, lr_0 = 5.6913e-04
Loss = 1.6423e-01, PNorm = 66.0031, GNorm = 0.6633, lr_0 = 5.7288e-04
Loss = 1.4797e-01, PNorm = 66.0529, GNorm = 0.6833, lr_0 = 5.7662e-04
Loss = 1.3655e-01, PNorm = 66.1059, GNorm = 0.7459, lr_0 = 5.8038e-04
Loss = 1.3446e-01, PNorm = 66.1571, GNorm = 0.8384, lr_0 = 5.8413e-04
Loss = 1.4549e-01, PNorm = 66.2215, GNorm = 0.9664, lr_0 = 5.8788e-04
Loss = 1.4627e-01, PNorm = 66.2765, GNorm = 0.8670, lr_0 = 5.9163e-04
Loss = 1.6439e-01, PNorm = 66.3402, GNorm = 0.7431, lr_0 = 5.9538e-04
Loss = 1.4531e-01, PNorm = 66.4045, GNorm = 0.9252, lr_0 = 5.9913e-04
Loss = 1.4229e-01, PNorm = 66.4620, GNorm = 0.8150, lr_0 = 6.0288e-04
Loss = 1.1719e-01, PNorm = 66.5186, GNorm = 0.9282, lr_0 = 6.0663e-04
Loss = 1.3702e-01, PNorm = 66.5693, GNorm = 0.9816, lr_0 = 6.1038e-04
Loss = 1.3932e-01, PNorm = 66.6267, GNorm = 0.9764, lr_0 = 6.1413e-04
Loss = 1.6169e-01, PNorm = 66.6861, GNorm = 0.8194, lr_0 = 6.1788e-04
Loss = 1.5419e-01, PNorm = 66.7552, GNorm = 0.8394, lr_0 = 6.2163e-04
Loss = 1.4256e-01, PNorm = 66.8246, GNorm = 0.9519, lr_0 = 6.2538e-04
Loss = 1.5952e-01, PNorm = 66.8845, GNorm = 0.7496, lr_0 = 6.2913e-04
Loss = 1.5342e-01, PNorm = 66.9534, GNorm = 0.8242, lr_0 = 6.3288e-04
Loss = 1.4757e-01, PNorm = 67.0158, GNorm = 0.8576, lr_0 = 6.3663e-04
Loss = 1.4193e-01, PNorm = 67.0816, GNorm = 1.0136, lr_0 = 6.4038e-04
Loss = 1.3497e-01, PNorm = 67.1524, GNorm = 0.7894, lr_0 = 6.4413e-04
Loss = 1.8816e-01, PNorm = 67.2193, GNorm = 1.0412, lr_0 = 6.4788e-04
Loss = 1.6277e-01, PNorm = 67.3024, GNorm = 0.7880, lr_0 = 6.5163e-04
Loss = 1.4848e-01, PNorm = 67.3917, GNorm = 0.8112, lr_0 = 6.5538e-04
Loss = 1.4175e-01, PNorm = 67.4748, GNorm = 0.7497, lr_0 = 6.5913e-04
Loss = 1.7270e-01, PNorm = 67.5512, GNorm = 1.0028, lr_0 = 6.6288e-04
Loss = 1.3805e-01, PNorm = 67.6357, GNorm = 0.7924, lr_0 = 6.6663e-04
Loss = 1.4859e-01, PNorm = 67.7078, GNorm = 0.6988, lr_0 = 6.7038e-04
Loss = 1.5687e-01, PNorm = 67.7871, GNorm = 1.1515, lr_0 = 6.7413e-04
Loss = 1.6449e-01, PNorm = 67.8639, GNorm = 1.0200, lr_0 = 6.7788e-04
Loss = 1.9209e-01, PNorm = 67.9530, GNorm = 0.9847, lr_0 = 6.8163e-04
Loss = 1.8175e-01, PNorm = 68.0465, GNorm = 1.1743, lr_0 = 6.8538e-04
Loss = 1.6476e-01, PNorm = 68.1356, GNorm = 1.3337, lr_0 = 6.8913e-04
Loss = 1.6290e-01, PNorm = 68.2343, GNorm = 1.0618, lr_0 = 6.9288e-04
Loss = 1.8964e-01, PNorm = 68.3203, GNorm = 1.0550, lr_0 = 6.9663e-04
Loss = 1.6901e-01, PNorm = 68.4103, GNorm = 0.8905, lr_0 = 7.0038e-04
Loss = 1.6445e-01, PNorm = 68.4965, GNorm = 0.9163, lr_0 = 7.0413e-04
Loss = 1.7795e-01, PNorm = 68.5861, GNorm = 1.1621, lr_0 = 7.0788e-04
Loss = 1.4829e-01, PNorm = 68.6692, GNorm = 0.7164, lr_0 = 7.1163e-04
Loss = 1.5595e-01, PNorm = 68.7638, GNorm = 0.7084, lr_0 = 7.1538e-04
Loss = 1.5175e-01, PNorm = 68.8442, GNorm = 1.0992, lr_0 = 7.1913e-04
Loss = 1.6932e-01, PNorm = 68.9357, GNorm = 0.6039, lr_0 = 7.2288e-04
Loss = 1.5872e-01, PNorm = 69.0265, GNorm = 0.7296, lr_0 = 7.2663e-04
Loss = 1.4541e-01, PNorm = 69.1188, GNorm = 0.6420, lr_0 = 7.3038e-04
Loss = 1.6895e-01, PNorm = 69.2120, GNorm = 0.7897, lr_0 = 7.3413e-04
Loss = 1.6393e-01, PNorm = 69.3048, GNorm = 0.9180, lr_0 = 7.3788e-04
Loss = 1.6975e-01, PNorm = 69.3982, GNorm = 0.6870, lr_0 = 7.4163e-04
Loss = 1.4185e-01, PNorm = 69.4901, GNorm = 0.6431, lr_0 = 7.4538e-04
Loss = 1.5333e-01, PNorm = 69.5776, GNorm = 0.6926, lr_0 = 7.4913e-04
Loss = 1.5983e-01, PNorm = 69.6677, GNorm = 0.9247, lr_0 = 7.5288e-04
Loss = 1.7083e-01, PNorm = 69.7568, GNorm = 0.7566, lr_0 = 7.5663e-04
Loss = 1.5335e-01, PNorm = 69.8545, GNorm = 0.8087, lr_0 = 7.6038e-04
Loss = 1.4861e-01, PNorm = 69.9468, GNorm = 0.6256, lr_0 = 7.6413e-04
Loss = 1.8688e-01, PNorm = 70.0457, GNorm = 0.7589, lr_0 = 7.6788e-04
Loss = 1.6788e-01, PNorm = 70.1487, GNorm = 0.6791, lr_0 = 7.7163e-04
Loss = 1.6646e-01, PNorm = 70.2307, GNorm = 0.6589, lr_0 = 7.7538e-04
Loss = 1.7268e-01, PNorm = 70.3427, GNorm = 0.8267, lr_0 = 7.7913e-04
Loss = 1.5865e-01, PNorm = 70.4434, GNorm = 0.8840, lr_0 = 7.8288e-04
Loss = 1.7790e-01, PNorm = 70.5616, GNorm = 0.6768, lr_0 = 7.8663e-04
Loss = 1.9478e-01, PNorm = 70.6730, GNorm = 1.0876, lr_0 = 7.9038e-04
Loss = 1.9183e-01, PNorm = 70.7910, GNorm = 1.2100, lr_0 = 7.9413e-04
Loss = 1.6886e-01, PNorm = 70.8968, GNorm = 0.8821, lr_0 = 7.9788e-04
Loss = 1.7054e-01, PNorm = 71.0035, GNorm = 0.7336, lr_0 = 8.0163e-04
Loss = 1.7100e-01, PNorm = 71.1121, GNorm = 1.1783, lr_0 = 8.0538e-04
Loss = 1.7335e-01, PNorm = 71.2249, GNorm = 0.6517, lr_0 = 8.0913e-04
Loss = 1.7398e-01, PNorm = 71.3485, GNorm = 0.6930, lr_0 = 8.1288e-04
Loss = 1.8159e-01, PNorm = 71.4613, GNorm = 0.7638, lr_0 = 8.1663e-04
Loss = 1.7331e-01, PNorm = 71.5866, GNorm = 0.5994, lr_0 = 8.2038e-04
Loss = 1.8665e-01, PNorm = 71.6994, GNorm = 1.0490, lr_0 = 8.2413e-04
Loss = 1.5843e-01, PNorm = 71.8196, GNorm = 1.0100, lr_0 = 8.2788e-04
Loss = 1.6816e-01, PNorm = 71.9371, GNorm = 1.1642, lr_0 = 8.3163e-04
Loss = 1.6837e-01, PNorm = 72.0463, GNorm = 0.7137, lr_0 = 8.3538e-04
Loss = 1.7548e-01, PNorm = 72.1607, GNorm = 0.8461, lr_0 = 8.3913e-04
Loss = 1.8949e-01, PNorm = 72.2722, GNorm = 1.0248, lr_0 = 8.4288e-04
Loss = 1.8659e-01, PNorm = 72.3950, GNorm = 0.8664, lr_0 = 8.4663e-04
Loss = 1.8219e-01, PNorm = 72.5056, GNorm = 0.7890, lr_0 = 8.5038e-04
Loss = 1.7589e-01, PNorm = 72.6377, GNorm = 1.1519, lr_0 = 8.5413e-04
Loss = 1.5333e-01, PNorm = 72.7349, GNorm = 0.5052, lr_0 = 8.5788e-04
Loss = 1.7454e-01, PNorm = 72.8525, GNorm = 0.8151, lr_0 = 8.6163e-04
Loss = 1.9299e-01, PNorm = 72.9589, GNorm = 0.7303, lr_0 = 8.6538e-04
Loss = 1.4887e-01, PNorm = 73.0892, GNorm = 0.5712, lr_0 = 8.6913e-04
Loss = 1.8450e-01, PNorm = 73.2014, GNorm = 0.6547, lr_0 = 8.7288e-04
Loss = 1.7217e-01, PNorm = 73.3341, GNorm = 0.7717, lr_0 = 8.7663e-04
Loss = 1.7183e-01, PNorm = 73.4557, GNorm = 1.1106, lr_0 = 8.8038e-04
Loss = 1.8005e-01, PNorm = 73.5695, GNorm = 0.5181, lr_0 = 8.8413e-04
Loss = 1.7944e-01, PNorm = 73.7033, GNorm = 1.2862, lr_0 = 8.8788e-04
Loss = 2.0230e-01, PNorm = 73.8361, GNorm = 0.7660, lr_0 = 8.9163e-04
Loss = 1.9157e-01, PNorm = 73.9654, GNorm = 0.8819, lr_0 = 8.9538e-04
Loss = 1.6490e-01, PNorm = 74.1068, GNorm = 1.1572, lr_0 = 8.9913e-04
Loss = 1.6459e-01, PNorm = 74.2294, GNorm = 0.7939, lr_0 = 9.0288e-04
Loss = 1.6124e-01, PNorm = 74.3514, GNorm = 0.9844, lr_0 = 9.0663e-04
Loss = 1.8724e-01, PNorm = 74.4661, GNorm = 1.0802, lr_0 = 9.1038e-04
Loss = 1.6514e-01, PNorm = 74.6037, GNorm = 1.5590, lr_0 = 9.1413e-04
Loss = 1.7581e-01, PNorm = 74.7262, GNorm = 0.8541, lr_0 = 9.1788e-04
Loss = 1.7474e-01, PNorm = 74.8625, GNorm = 0.8116, lr_0 = 9.2163e-04
Loss = 1.5824e-01, PNorm = 74.9881, GNorm = 0.5978, lr_0 = 9.2538e-04
Loss = 1.7072e-01, PNorm = 75.1189, GNorm = 0.8040, lr_0 = 9.2913e-04
Loss = 1.9347e-01, PNorm = 75.2551, GNorm = 0.8433, lr_0 = 9.3288e-04
Loss = 1.8965e-01, PNorm = 75.3996, GNorm = 0.6458, lr_0 = 9.3663e-04
Loss = 1.8875e-01, PNorm = 75.5383, GNorm = 0.7811, lr_0 = 9.4038e-04
Loss = 1.7175e-01, PNorm = 75.6768, GNorm = 0.9343, lr_0 = 9.4413e-04
Loss = 1.6847e-01, PNorm = 75.8197, GNorm = 0.6466, lr_0 = 9.4788e-04
Loss = 1.8399e-01, PNorm = 75.9544, GNorm = 1.0065, lr_0 = 9.5163e-04
Loss = 1.5892e-01, PNorm = 76.0909, GNorm = 0.8859, lr_0 = 9.5538e-04
Loss = 1.8020e-01, PNorm = 76.2201, GNorm = 0.8087, lr_0 = 9.5913e-04
Loss = 1.9080e-01, PNorm = 76.3611, GNorm = 0.6764, lr_0 = 9.6288e-04
Loss = 1.5440e-01, PNorm = 76.5021, GNorm = 0.7706, lr_0 = 9.6663e-04
Loss = 1.6972e-01, PNorm = 76.6222, GNorm = 0.8198, lr_0 = 9.7038e-04
Loss = 1.7740e-01, PNorm = 76.7452, GNorm = 0.6942, lr_0 = 9.7413e-04
Loss = 1.7130e-01, PNorm = 76.8734, GNorm = 0.6411, lr_0 = 9.7788e-04
Loss = 1.4677e-01, PNorm = 76.9966, GNorm = 0.6488, lr_0 = 9.8163e-04
Loss = 1.9556e-01, PNorm = 77.1211, GNorm = 0.8052, lr_0 = 9.8537e-04
Loss = 1.9211e-01, PNorm = 77.2634, GNorm = 0.9243, lr_0 = 9.8912e-04
Loss = 1.5975e-01, PNorm = 77.4023, GNorm = 0.6740, lr_0 = 9.9288e-04
Loss = 1.8788e-01, PNorm = 77.5415, GNorm = 1.0800, lr_0 = 9.9663e-04
Loss = 2.0364e-01, PNorm = 77.6881, GNorm = 0.8123, lr_0 = 9.9993e-04
Validation mae = 0.536655
Epoch 2
Loss = 1.2059e-01, PNorm = 77.8272, GNorm = 0.8875, lr_0 = 9.9925e-04
Loss = 1.1843e-01, PNorm = 77.9378, GNorm = 0.6112, lr_0 = 9.9856e-04
Loss = 1.0217e-01, PNorm = 78.0480, GNorm = 0.6670, lr_0 = 9.9788e-04
Loss = 1.0795e-01, PNorm = 78.1573, GNorm = 0.6960, lr_0 = 9.9719e-04
Loss = 1.0372e-01, PNorm = 78.2480, GNorm = 0.5101, lr_0 = 9.9651e-04
Loss = 8.9803e-02, PNorm = 78.3403, GNorm = 0.5283, lr_0 = 9.9583e-04
Loss = 1.1398e-01, PNorm = 78.4369, GNorm = 0.8081, lr_0 = 9.9515e-04
Loss = 1.1328e-01, PNorm = 78.5437, GNorm = 0.5805, lr_0 = 9.9446e-04
Loss = 1.2110e-01, PNorm = 78.6473, GNorm = 0.9209, lr_0 = 9.9378e-04
Loss = 1.1103e-01, PNorm = 78.7577, GNorm = 0.5116, lr_0 = 9.9310e-04
Loss = 1.1793e-01, PNorm = 78.8625, GNorm = 0.6282, lr_0 = 9.9242e-04
Loss = 1.0470e-01, PNorm = 78.9621, GNorm = 0.5482, lr_0 = 9.9174e-04
Loss = 1.1523e-01, PNorm = 79.0535, GNorm = 0.7246, lr_0 = 9.9106e-04
Loss = 1.0017e-01, PNorm = 79.1514, GNorm = 0.4406, lr_0 = 9.9038e-04
Loss = 1.0025e-01, PNorm = 79.2416, GNorm = 0.5298, lr_0 = 9.8971e-04
Loss = 8.8180e-02, PNorm = 79.3349, GNorm = 0.9646, lr_0 = 9.8903e-04
Loss = 1.2010e-01, PNorm = 79.4214, GNorm = 0.4820, lr_0 = 9.8835e-04
Loss = 9.9421e-02, PNorm = 79.5207, GNorm = 0.5651, lr_0 = 9.8767e-04
Loss = 1.1502e-01, PNorm = 79.6219, GNorm = 0.7025, lr_0 = 9.8700e-04
Loss = 1.0679e-01, PNorm = 79.7253, GNorm = 0.8799, lr_0 = 9.8632e-04
Loss = 1.0802e-01, PNorm = 79.8258, GNorm = 0.6596, lr_0 = 9.8564e-04
Loss = 1.1243e-01, PNorm = 79.9359, GNorm = 0.7170, lr_0 = 9.8497e-04
Loss = 9.7357e-02, PNorm = 80.0332, GNorm = 0.5399, lr_0 = 9.8429e-04
Loss = 1.1514e-01, PNorm = 80.1383, GNorm = 0.8062, lr_0 = 9.8362e-04
Loss = 9.5050e-02, PNorm = 80.2396, GNorm = 0.6502, lr_0 = 9.8295e-04
Loss = 1.1427e-01, PNorm = 80.3545, GNorm = 0.5626, lr_0 = 9.8227e-04
Loss = 1.0862e-01, PNorm = 80.4455, GNorm = 0.5746, lr_0 = 9.8160e-04
Loss = 1.0991e-01, PNorm = 80.5533, GNorm = 0.6328, lr_0 = 9.8093e-04
Loss = 9.4262e-02, PNorm = 80.6383, GNorm = 0.4568, lr_0 = 9.8026e-04
Loss = 1.1567e-01, PNorm = 80.7416, GNorm = 0.6999, lr_0 = 9.7958e-04
Loss = 1.1170e-01, PNorm = 80.8444, GNorm = 0.5083, lr_0 = 9.7891e-04
Loss = 1.0368e-01, PNorm = 80.9422, GNorm = 0.8664, lr_0 = 9.7824e-04
Loss = 1.1656e-01, PNorm = 81.0373, GNorm = 0.7150, lr_0 = 9.7757e-04
Loss = 1.1325e-01, PNorm = 81.1510, GNorm = 0.4612, lr_0 = 9.7690e-04
Loss = 1.1077e-01, PNorm = 81.2571, GNorm = 0.5576, lr_0 = 9.7623e-04
Loss = 1.1527e-01, PNorm = 81.3714, GNorm = 0.8835, lr_0 = 9.7556e-04
Loss = 1.0686e-01, PNorm = 81.4717, GNorm = 0.4194, lr_0 = 9.7490e-04
Loss = 1.1332e-01, PNorm = 81.5800, GNorm = 0.7846, lr_0 = 9.7423e-04
Loss = 1.0715e-01, PNorm = 81.6805, GNorm = 0.7734, lr_0 = 9.7356e-04
Loss = 1.0735e-01, PNorm = 81.7788, GNorm = 0.6377, lr_0 = 9.7289e-04
Loss = 1.0503e-01, PNorm = 81.8819, GNorm = 0.9918, lr_0 = 9.7223e-04
Loss = 1.0017e-01, PNorm = 81.9767, GNorm = 0.5193, lr_0 = 9.7156e-04
Loss = 1.1950e-01, PNorm = 82.0831, GNorm = 0.7933, lr_0 = 9.7090e-04
Loss = 1.0060e-01, PNorm = 82.1859, GNorm = 0.4895, lr_0 = 9.7023e-04
Loss = 1.0279e-01, PNorm = 82.2867, GNorm = 0.4943, lr_0 = 9.6957e-04
Loss = 1.0854e-01, PNorm = 82.3919, GNorm = 0.6425, lr_0 = 9.6890e-04
Loss = 9.7938e-02, PNorm = 82.4927, GNorm = 0.5139, lr_0 = 9.6824e-04
Loss = 9.3266e-02, PNorm = 82.5851, GNorm = 0.4094, lr_0 = 9.6757e-04
Loss = 1.2264e-01, PNorm = 82.6806, GNorm = 1.0529, lr_0 = 9.6691e-04
Loss = 1.0443e-01, PNorm = 82.7891, GNorm = 0.6059, lr_0 = 9.6625e-04
Loss = 1.0430e-01, PNorm = 82.8926, GNorm = 0.5403, lr_0 = 9.6559e-04
Loss = 1.1668e-01, PNorm = 82.9999, GNorm = 0.6980, lr_0 = 9.6493e-04
Loss = 1.2985e-01, PNorm = 83.0970, GNorm = 1.3114, lr_0 = 9.6427e-04
Loss = 1.3008e-01, PNorm = 83.2206, GNorm = 1.9122, lr_0 = 9.6360e-04
Loss = 1.1664e-01, PNorm = 83.3199, GNorm = 0.8632, lr_0 = 9.6294e-04
Loss = 1.0576e-01, PNorm = 83.4350, GNorm = 0.7220, lr_0 = 9.6228e-04
Loss = 1.2389e-01, PNorm = 83.5321, GNorm = 0.7362, lr_0 = 9.6163e-04
Loss = 1.1516e-01, PNorm = 83.6440, GNorm = 0.7997, lr_0 = 9.6097e-04
Loss = 1.1848e-01, PNorm = 83.7483, GNorm = 0.4767, lr_0 = 9.6031e-04
Loss = 1.1295e-01, PNorm = 83.8576, GNorm = 0.7069, lr_0 = 9.5965e-04
Loss = 1.0862e-01, PNorm = 83.9664, GNorm = 1.4310, lr_0 = 9.5899e-04
Loss = 1.4804e-01, PNorm = 84.0786, GNorm = 0.7299, lr_0 = 9.5834e-04
Loss = 1.2477e-01, PNorm = 84.1867, GNorm = 0.6107, lr_0 = 9.5768e-04
Loss = 1.2984e-01, PNorm = 84.2995, GNorm = 0.8188, lr_0 = 9.5702e-04
Loss = 1.1022e-01, PNorm = 84.4315, GNorm = 0.9104, lr_0 = 9.5637e-04
Loss = 1.0378e-01, PNorm = 84.5534, GNorm = 0.5579, lr_0 = 9.5571e-04
Loss = 9.3641e-02, PNorm = 84.6647, GNorm = 0.3594, lr_0 = 9.5506e-04
Loss = 1.3411e-01, PNorm = 84.7696, GNorm = 0.5080, lr_0 = 9.5440e-04
Loss = 1.0657e-01, PNorm = 84.8837, GNorm = 0.8505, lr_0 = 9.5375e-04
Loss = 1.1714e-01, PNorm = 84.9883, GNorm = 0.9707, lr_0 = 9.5310e-04
Loss = 1.1360e-01, PNorm = 85.0950, GNorm = 0.4737, lr_0 = 9.5244e-04
Loss = 1.1401e-01, PNorm = 85.2057, GNorm = 0.6592, lr_0 = 9.5179e-04
Loss = 1.1707e-01, PNorm = 85.3041, GNorm = 0.7339, lr_0 = 9.5114e-04
Loss = 1.2329e-01, PNorm = 85.3995, GNorm = 0.7243, lr_0 = 9.5049e-04
Loss = 1.2407e-01, PNorm = 85.5115, GNorm = 0.8130, lr_0 = 9.4984e-04
Loss = 1.3236e-01, PNorm = 85.6220, GNorm = 1.0819, lr_0 = 9.4919e-04
Loss = 1.0639e-01, PNorm = 85.7398, GNorm = 0.6639, lr_0 = 9.4854e-04
Loss = 1.1724e-01, PNorm = 85.8489, GNorm = 0.5458, lr_0 = 9.4789e-04
Loss = 1.1558e-01, PNorm = 85.9571, GNorm = 0.5077, lr_0 = 9.4724e-04
Loss = 1.0942e-01, PNorm = 86.0622, GNorm = 0.5864, lr_0 = 9.4659e-04
Loss = 1.1537e-01, PNorm = 86.1624, GNorm = 0.6169, lr_0 = 9.4594e-04
Loss = 1.0444e-01, PNorm = 86.2728, GNorm = 0.5294, lr_0 = 9.4529e-04
Loss = 1.1581e-01, PNorm = 86.3693, GNorm = 0.8827, lr_0 = 9.4464e-04
Loss = 1.0253e-01, PNorm = 86.4568, GNorm = 0.4273, lr_0 = 9.4400e-04
Loss = 1.0339e-01, PNorm = 86.5644, GNorm = 1.3429, lr_0 = 9.4335e-04
Loss = 9.5692e-02, PNorm = 86.6599, GNorm = 1.0252, lr_0 = 9.4270e-04
Loss = 1.2187e-01, PNorm = 86.7643, GNorm = 0.8611, lr_0 = 9.4206e-04
Loss = 1.0338e-01, PNorm = 86.8545, GNorm = 0.7352, lr_0 = 9.4141e-04
Loss = 1.2510e-01, PNorm = 86.9673, GNorm = 0.9056, lr_0 = 9.4077e-04
Loss = 1.1790e-01, PNorm = 87.0682, GNorm = 0.5166, lr_0 = 9.4012e-04
Loss = 1.0986e-01, PNorm = 87.1719, GNorm = 0.4290, lr_0 = 9.3948e-04
Loss = 1.1046e-01, PNorm = 87.2696, GNorm = 0.7528, lr_0 = 9.3884e-04
Loss = 1.0766e-01, PNorm = 87.3756, GNorm = 0.6111, lr_0 = 9.3819e-04
Loss = 1.1363e-01, PNorm = 87.4835, GNorm = 0.7446, lr_0 = 9.3755e-04
Loss = 1.1820e-01, PNorm = 87.5898, GNorm = 0.6771, lr_0 = 9.3691e-04
Loss = 1.1310e-01, PNorm = 87.6891, GNorm = 0.4892, lr_0 = 9.3627e-04
Loss = 1.1984e-01, PNorm = 87.8050, GNorm = 0.9570, lr_0 = 9.3562e-04
Loss = 1.0380e-01, PNorm = 87.9135, GNorm = 0.3407, lr_0 = 9.3498e-04
Loss = 1.3782e-01, PNorm = 88.0203, GNorm = 0.5100, lr_0 = 9.3434e-04
Loss = 1.2499e-01, PNorm = 88.1293, GNorm = 0.7398, lr_0 = 9.3370e-04
Loss = 1.3221e-01, PNorm = 88.2314, GNorm = 0.7798, lr_0 = 9.3306e-04
Loss = 1.3625e-01, PNorm = 88.3564, GNorm = 0.6527, lr_0 = 9.3242e-04
Loss = 1.2502e-01, PNorm = 88.4863, GNorm = 1.0634, lr_0 = 9.3178e-04
Loss = 1.1110e-01, PNorm = 88.5924, GNorm = 0.7219, lr_0 = 9.3115e-04
Loss = 1.1370e-01, PNorm = 88.6990, GNorm = 0.8799, lr_0 = 9.3051e-04
Loss = 1.2720e-01, PNorm = 88.8038, GNorm = 1.0058, lr_0 = 9.2987e-04
Loss = 1.3200e-01, PNorm = 88.9036, GNorm = 1.2720, lr_0 = 9.2923e-04
Loss = 1.1612e-01, PNorm = 89.0181, GNorm = 0.6460, lr_0 = 9.2860e-04
Loss = 1.3091e-01, PNorm = 89.1245, GNorm = 0.9035, lr_0 = 9.2796e-04
Loss = 1.2505e-01, PNorm = 89.2363, GNorm = 0.4397, lr_0 = 9.2733e-04
Loss = 1.0296e-01, PNorm = 89.3553, GNorm = 0.5025, lr_0 = 9.2669e-04
Loss = 1.1647e-01, PNorm = 89.4584, GNorm = 1.1234, lr_0 = 9.2606e-04
Loss = 1.1475e-01, PNorm = 89.5657, GNorm = 0.9077, lr_0 = 9.2542e-04
Loss = 1.3381e-01, PNorm = 89.6683, GNorm = 0.6283, lr_0 = 9.2479e-04
Loss = 1.1923e-01, PNorm = 89.7788, GNorm = 0.6947, lr_0 = 9.2415e-04
Loss = 1.3524e-01, PNorm = 89.9012, GNorm = 0.8056, lr_0 = 9.2352e-04
Loss = 1.0140e-01, PNorm = 90.0180, GNorm = 0.6767, lr_0 = 9.2289e-04
Loss = 9.5460e-02, PNorm = 90.1227, GNorm = 0.4271, lr_0 = 9.2226e-04
Loss = 1.1975e-01, PNorm = 90.2076, GNorm = 0.6369, lr_0 = 9.2162e-04
Loss = 1.1698e-01, PNorm = 90.3204, GNorm = 0.6621, lr_0 = 9.2099e-04
Validation mae = 0.513960
Epoch 3
Loss = 8.4629e-02, PNorm = 90.4164, GNorm = 0.8926, lr_0 = 9.2036e-04
Loss = 7.9172e-02, PNorm = 90.5050, GNorm = 0.7374, lr_0 = 9.1973e-04
Loss = 7.2527e-02, PNorm = 90.5792, GNorm = 0.4808, lr_0 = 9.1910e-04
Loss = 7.4239e-02, PNorm = 90.6532, GNorm = 0.7977, lr_0 = 9.1847e-04
Loss = 7.3151e-02, PNorm = 90.7342, GNorm = 0.3865, lr_0 = 9.1784e-04
Loss = 6.9098e-02, PNorm = 90.7932, GNorm = 0.4087, lr_0 = 9.1721e-04
Loss = 6.7125e-02, PNorm = 90.8631, GNorm = 0.8005, lr_0 = 9.1658e-04
Loss = 7.2705e-02, PNorm = 90.9281, GNorm = 0.4518, lr_0 = 9.1596e-04
Loss = 5.9897e-02, PNorm = 91.0006, GNorm = 0.4897, lr_0 = 9.1533e-04
Loss = 6.7031e-02, PNorm = 91.0598, GNorm = 0.7879, lr_0 = 9.1470e-04
Loss = 6.3194e-02, PNorm = 91.1186, GNorm = 0.9747, lr_0 = 9.1408e-04
Loss = 7.2808e-02, PNorm = 91.1829, GNorm = 0.4204, lr_0 = 9.1345e-04
Loss = 5.8226e-02, PNorm = 91.2459, GNorm = 0.4160, lr_0 = 9.1282e-04
Loss = 6.6124e-02, PNorm = 91.3139, GNorm = 0.3694, lr_0 = 9.1220e-04
Loss = 5.8040e-02, PNorm = 91.3807, GNorm = 0.5051, lr_0 = 9.1157e-04
Loss = 5.9475e-02, PNorm = 91.4429, GNorm = 0.4764, lr_0 = 9.1095e-04
Loss = 5.7698e-02, PNorm = 91.4902, GNorm = 0.5339, lr_0 = 9.1032e-04
Loss = 7.5732e-02, PNorm = 91.5523, GNorm = 0.3415, lr_0 = 9.0970e-04
Loss = 6.5798e-02, PNorm = 91.6282, GNorm = 0.6685, lr_0 = 9.0908e-04
Loss = 6.4734e-02, PNorm = 91.6987, GNorm = 1.0158, lr_0 = 9.0846e-04
Loss = 7.2772e-02, PNorm = 91.7697, GNorm = 0.4225, lr_0 = 9.0783e-04
Loss = 6.2534e-02, PNorm = 91.8321, GNorm = 0.2692, lr_0 = 9.0721e-04
Loss = 5.8109e-02, PNorm = 91.8883, GNorm = 0.3669, lr_0 = 9.0659e-04
Loss = 6.5728e-02, PNorm = 91.9574, GNorm = 0.5948, lr_0 = 9.0597e-04
Loss = 6.0598e-02, PNorm = 92.0226, GNorm = 0.3462, lr_0 = 9.0535e-04
Loss = 6.0581e-02, PNorm = 92.0809, GNorm = 0.3947, lr_0 = 9.0473e-04
Loss = 6.3378e-02, PNorm = 92.1534, GNorm = 0.4271, lr_0 = 9.0411e-04
Loss = 7.0496e-02, PNorm = 92.2158, GNorm = 0.5172, lr_0 = 9.0349e-04
Loss = 6.7463e-02, PNorm = 92.2845, GNorm = 0.8799, lr_0 = 9.0287e-04
Loss = 6.6513e-02, PNorm = 92.3470, GNorm = 0.4546, lr_0 = 9.0225e-04
Loss = 5.7995e-02, PNorm = 92.4137, GNorm = 0.5343, lr_0 = 9.0163e-04
Loss = 5.4437e-02, PNorm = 92.4749, GNorm = 0.2905, lr_0 = 9.0102e-04
Loss = 6.7001e-02, PNorm = 92.5423, GNorm = 0.6461, lr_0 = 9.0040e-04
Loss = 6.8232e-02, PNorm = 92.6099, GNorm = 0.3737, lr_0 = 8.9978e-04
Loss = 7.1543e-02, PNorm = 92.6827, GNorm = 0.7106, lr_0 = 8.9916e-04
Loss = 6.4840e-02, PNorm = 92.7620, GNorm = 0.9400, lr_0 = 8.9855e-04
Loss = 6.8600e-02, PNorm = 92.8395, GNorm = 0.7766, lr_0 = 8.9793e-04
Loss = 6.2973e-02, PNorm = 92.9109, GNorm = 0.3380, lr_0 = 8.9732e-04
Loss = 6.6094e-02, PNorm = 92.9879, GNorm = 0.4907, lr_0 = 8.9670e-04
Loss = 7.0658e-02, PNorm = 93.0658, GNorm = 0.3966, lr_0 = 8.9609e-04
Loss = 6.2802e-02, PNorm = 93.1392, GNorm = 0.4324, lr_0 = 8.9548e-04
Loss = 5.9911e-02, PNorm = 93.2029, GNorm = 0.5443, lr_0 = 8.9486e-04
Loss = 6.8894e-02, PNorm = 93.2753, GNorm = 0.5006, lr_0 = 8.9425e-04
Loss = 5.7837e-02, PNorm = 93.3513, GNorm = 0.4816, lr_0 = 8.9364e-04
Loss = 7.1338e-02, PNorm = 93.4031, GNorm = 0.4207, lr_0 = 8.9302e-04
Loss = 6.7402e-02, PNorm = 93.4653, GNorm = 0.4189, lr_0 = 8.9241e-04
Loss = 6.5856e-02, PNorm = 93.5319, GNorm = 0.6676, lr_0 = 8.9180e-04
Loss = 6.2056e-02, PNorm = 93.5961, GNorm = 0.4337, lr_0 = 8.9119e-04
Loss = 7.1611e-02, PNorm = 93.6725, GNorm = 0.3469, lr_0 = 8.9058e-04
Loss = 6.5308e-02, PNorm = 93.7401, GNorm = 0.3229, lr_0 = 8.8997e-04
Loss = 6.5268e-02, PNorm = 93.8099, GNorm = 0.9508, lr_0 = 8.8936e-04
Loss = 6.7866e-02, PNorm = 93.8756, GNorm = 0.5202, lr_0 = 8.8875e-04
Loss = 7.2661e-02, PNorm = 93.9614, GNorm = 0.5619, lr_0 = 8.8814e-04
Loss = 6.5285e-02, PNorm = 94.0366, GNorm = 0.3456, lr_0 = 8.8753e-04
Loss = 8.1044e-02, PNorm = 94.1122, GNorm = 0.6322, lr_0 = 8.8693e-04
Loss = 7.3441e-02, PNorm = 94.1861, GNorm = 0.6908, lr_0 = 8.8632e-04
Loss = 7.2433e-02, PNorm = 94.2701, GNorm = 0.6543, lr_0 = 8.8571e-04
Loss = 6.6129e-02, PNorm = 94.3509, GNorm = 0.4634, lr_0 = 8.8510e-04
Loss = 6.5424e-02, PNorm = 94.4228, GNorm = 0.5636, lr_0 = 8.8450e-04
Loss = 7.4024e-02, PNorm = 94.5085, GNorm = 0.4135, lr_0 = 8.8389e-04
Loss = 6.9261e-02, PNorm = 94.5863, GNorm = 0.8082, lr_0 = 8.8329e-04
Loss = 7.4300e-02, PNorm = 94.6632, GNorm = 0.5902, lr_0 = 8.8268e-04
Loss = 7.6682e-02, PNorm = 94.7379, GNorm = 0.5717, lr_0 = 8.8208e-04
Loss = 7.2650e-02, PNorm = 94.8085, GNorm = 0.9139, lr_0 = 8.8147e-04
Loss = 7.3586e-02, PNorm = 94.8850, GNorm = 0.3671, lr_0 = 8.8087e-04
Loss = 6.6742e-02, PNorm = 94.9705, GNorm = 0.6016, lr_0 = 8.8026e-04
Loss = 7.5381e-02, PNorm = 95.0542, GNorm = 0.9368, lr_0 = 8.7966e-04
Loss = 6.8609e-02, PNorm = 95.1416, GNorm = 0.8130, lr_0 = 8.7906e-04
Loss = 6.9742e-02, PNorm = 95.2292, GNorm = 1.0386, lr_0 = 8.7846e-04
Loss = 7.6114e-02, PNorm = 95.3086, GNorm = 0.8603, lr_0 = 8.7785e-04
Loss = 7.4371e-02, PNorm = 95.4029, GNorm = 0.4713, lr_0 = 8.7725e-04
Loss = 7.0374e-02, PNorm = 95.4821, GNorm = 0.6854, lr_0 = 8.7665e-04
Loss = 7.0159e-02, PNorm = 95.5540, GNorm = 0.4019, lr_0 = 8.7605e-04
Loss = 7.0837e-02, PNorm = 95.6356, GNorm = 0.5749, lr_0 = 8.7545e-04
Loss = 9.0572e-02, PNorm = 95.7354, GNorm = 0.6968, lr_0 = 8.7485e-04
Loss = 6.3456e-02, PNorm = 95.8254, GNorm = 0.3984, lr_0 = 8.7425e-04
Loss = 7.8158e-02, PNorm = 95.9163, GNorm = 0.4286, lr_0 = 8.7365e-04
Loss = 7.2373e-02, PNorm = 95.9930, GNorm = 0.6254, lr_0 = 8.7306e-04
Loss = 7.4179e-02, PNorm = 96.0779, GNorm = 0.8332, lr_0 = 8.7246e-04
Loss = 8.1593e-02, PNorm = 96.1604, GNorm = 0.5129, lr_0 = 8.7186e-04
Loss = 7.5756e-02, PNorm = 96.2494, GNorm = 0.4365, lr_0 = 8.7126e-04
Loss = 7.3436e-02, PNorm = 96.3365, GNorm = 0.3465, lr_0 = 8.7067e-04
Loss = 7.7176e-02, PNorm = 96.4347, GNorm = 0.5277, lr_0 = 8.7007e-04
Loss = 7.9314e-02, PNorm = 96.5271, GNorm = 0.6815, lr_0 = 8.6947e-04
Loss = 8.6696e-02, PNorm = 96.6158, GNorm = 0.5438, lr_0 = 8.6888e-04
Loss = 7.5222e-02, PNorm = 96.7129, GNorm = 0.3108, lr_0 = 8.6828e-04
Loss = 8.2696e-02, PNorm = 96.7983, GNorm = 0.6579, lr_0 = 8.6769e-04
Loss = 6.4876e-02, PNorm = 96.8936, GNorm = 0.8258, lr_0 = 8.6709e-04
Loss = 8.0516e-02, PNorm = 96.9699, GNorm = 0.5538, lr_0 = 8.6650e-04
Loss = 6.2688e-02, PNorm = 97.0579, GNorm = 0.6223, lr_0 = 8.6590e-04
Loss = 7.4231e-02, PNorm = 97.1325, GNorm = 0.7626, lr_0 = 8.6531e-04
Loss = 7.1606e-02, PNorm = 97.2179, GNorm = 1.1680, lr_0 = 8.6472e-04
Loss = 7.9789e-02, PNorm = 97.2992, GNorm = 0.3461, lr_0 = 8.6413e-04
Loss = 6.6215e-02, PNorm = 97.3913, GNorm = 0.6788, lr_0 = 8.6353e-04
Loss = 7.4122e-02, PNorm = 97.4731, GNorm = 0.3625, lr_0 = 8.6294e-04
Loss = 7.5962e-02, PNorm = 97.5628, GNorm = 0.4458, lr_0 = 8.6235e-04
Loss = 7.5631e-02, PNorm = 97.6428, GNorm = 0.5133, lr_0 = 8.6176e-04
Loss = 7.1200e-02, PNorm = 97.7292, GNorm = 0.4569, lr_0 = 8.6117e-04
Loss = 6.9181e-02, PNorm = 97.8029, GNorm = 0.4523, lr_0 = 8.6058e-04
Loss = 7.2914e-02, PNorm = 97.8851, GNorm = 0.3973, lr_0 = 8.5999e-04
Loss = 7.6777e-02, PNorm = 97.9637, GNorm = 0.7872, lr_0 = 8.5940e-04
Loss = 8.1533e-02, PNorm = 98.0593, GNorm = 1.0160, lr_0 = 8.5881e-04
Loss = 8.0087e-02, PNorm = 98.1610, GNorm = 0.3264, lr_0 = 8.5823e-04
Loss = 6.3515e-02, PNorm = 98.2525, GNorm = 0.5173, lr_0 = 8.5764e-04
Loss = 8.2440e-02, PNorm = 98.3370, GNorm = 0.4979, lr_0 = 8.5705e-04
Loss = 6.6440e-02, PNorm = 98.4308, GNorm = 0.4315, lr_0 = 8.5646e-04
Loss = 8.3635e-02, PNorm = 98.5256, GNorm = 0.3293, lr_0 = 8.5588e-04
Loss = 7.1185e-02, PNorm = 98.6268, GNorm = 0.4782, lr_0 = 8.5529e-04
Loss = 7.8627e-02, PNorm = 98.7196, GNorm = 0.5720, lr_0 = 8.5470e-04
Loss = 8.3234e-02, PNorm = 98.8177, GNorm = 0.5319, lr_0 = 8.5412e-04
Loss = 7.3042e-02, PNorm = 98.9160, GNorm = 0.4641, lr_0 = 8.5353e-04
Loss = 7.8407e-02, PNorm = 99.0183, GNorm = 0.6028, lr_0 = 8.5295e-04
Loss = 7.6142e-02, PNorm = 99.1044, GNorm = 0.6085, lr_0 = 8.5236e-04
Loss = 7.7248e-02, PNorm = 99.2006, GNorm = 0.3971, lr_0 = 8.5178e-04
Loss = 8.1284e-02, PNorm = 99.2820, GNorm = 0.8748, lr_0 = 8.5120e-04
Loss = 8.4466e-02, PNorm = 99.3800, GNorm = 0.3962, lr_0 = 8.5061e-04
Loss = 7.6085e-02, PNorm = 99.4830, GNorm = 0.3497, lr_0 = 8.5003e-04
Loss = 7.7457e-02, PNorm = 99.5755, GNorm = 0.5246, lr_0 = 8.4945e-04
Loss = 7.4301e-02, PNorm = 99.6698, GNorm = 0.6574, lr_0 = 8.4887e-04
Loss = 7.3759e-02, PNorm = 99.7695, GNorm = 0.9002, lr_0 = 8.4828e-04
Validation mae = 0.501606
Epoch 4
Loss = 5.0713e-02, PNorm = 99.8620, GNorm = 0.7414, lr_0 = 8.4770e-04
Loss = 5.5023e-02, PNorm = 99.9424, GNorm = 0.4108, lr_0 = 8.4712e-04
Loss = 5.0176e-02, PNorm = 99.9973, GNorm = 0.5176, lr_0 = 8.4654e-04
Loss = 4.3252e-02, PNorm = 100.0503, GNorm = 0.4534, lr_0 = 8.4596e-04
Loss = 5.3110e-02, PNorm = 100.1057, GNorm = 0.8222, lr_0 = 8.4538e-04
Loss = 4.5623e-02, PNorm = 100.1603, GNorm = 0.3836, lr_0 = 8.4480e-04
Loss = 4.8796e-02, PNorm = 100.2134, GNorm = 0.4106, lr_0 = 8.4423e-04
Loss = 4.4132e-02, PNorm = 100.2700, GNorm = 0.5303, lr_0 = 8.4365e-04
Loss = 4.8918e-02, PNorm = 100.3304, GNorm = 0.3986, lr_0 = 8.4307e-04
Loss = 4.5076e-02, PNorm = 100.3897, GNorm = 0.2954, lr_0 = 8.4249e-04
Loss = 5.1415e-02, PNorm = 100.4402, GNorm = 0.3190, lr_0 = 8.4191e-04
Loss = 4.7739e-02, PNorm = 100.4980, GNorm = 0.4052, lr_0 = 8.4134e-04
Loss = 4.5648e-02, PNorm = 100.5532, GNorm = 0.4739, lr_0 = 8.4076e-04
Loss = 4.4405e-02, PNorm = 100.6026, GNorm = 0.6080, lr_0 = 8.4019e-04
Loss = 4.6272e-02, PNorm = 100.6566, GNorm = 0.5286, lr_0 = 8.3961e-04
Loss = 4.2945e-02, PNorm = 100.7176, GNorm = 0.3781, lr_0 = 8.3903e-04
Loss = 4.7084e-02, PNorm = 100.7760, GNorm = 0.3150, lr_0 = 8.3846e-04
Loss = 3.9910e-02, PNorm = 100.8257, GNorm = 0.4089, lr_0 = 8.3789e-04
Loss = 4.4827e-02, PNorm = 100.8823, GNorm = 0.2876, lr_0 = 8.3731e-04
Loss = 4.7724e-02, PNorm = 100.9422, GNorm = 0.6612, lr_0 = 8.3674e-04
Loss = 4.5362e-02, PNorm = 101.0040, GNorm = 0.3095, lr_0 = 8.3616e-04
Loss = 4.3439e-02, PNorm = 101.0660, GNorm = 0.3915, lr_0 = 8.3559e-04
Loss = 4.9471e-02, PNorm = 101.1261, GNorm = 0.4091, lr_0 = 8.3502e-04
Loss = 4.4513e-02, PNorm = 101.1897, GNorm = 0.2966, lr_0 = 8.3445e-04
Loss = 4.4947e-02, PNorm = 101.2522, GNorm = 0.3803, lr_0 = 8.3388e-04
Loss = 4.3882e-02, PNorm = 101.3082, GNorm = 0.3242, lr_0 = 8.3330e-04
Loss = 4.8439e-02, PNorm = 101.3607, GNorm = 0.2554, lr_0 = 8.3273e-04
Loss = 4.8633e-02, PNorm = 101.4211, GNorm = 0.7634, lr_0 = 8.3216e-04
Loss = 4.7382e-02, PNorm = 101.4788, GNorm = 0.4585, lr_0 = 8.3159e-04
Loss = 4.6199e-02, PNorm = 101.5338, GNorm = 0.4773, lr_0 = 8.3102e-04
Loss = 4.9152e-02, PNorm = 101.5912, GNorm = 1.0231, lr_0 = 8.3045e-04
Loss = 4.5420e-02, PNorm = 101.6531, GNorm = 0.3233, lr_0 = 8.2988e-04
Loss = 4.3496e-02, PNorm = 101.7155, GNorm = 0.4808, lr_0 = 8.2932e-04
Loss = 4.9877e-02, PNorm = 101.7724, GNorm = 0.6357, lr_0 = 8.2875e-04
Loss = 4.4501e-02, PNorm = 101.8364, GNorm = 0.2157, lr_0 = 8.2818e-04
Loss = 4.6189e-02, PNorm = 101.8951, GNorm = 0.9935, lr_0 = 8.2761e-04
Loss = 3.9526e-02, PNorm = 101.9559, GNorm = 0.4939, lr_0 = 8.2705e-04
Loss = 5.7538e-02, PNorm = 102.0145, GNorm = 0.4636, lr_0 = 8.2648e-04
Loss = 4.7955e-02, PNorm = 102.0831, GNorm = 0.7519, lr_0 = 8.2591e-04
Loss = 4.5125e-02, PNorm = 102.1559, GNorm = 0.2820, lr_0 = 8.2535e-04
Loss = 5.2010e-02, PNorm = 102.2223, GNorm = 0.5058, lr_0 = 8.2478e-04
Loss = 5.1170e-02, PNorm = 102.2889, GNorm = 0.5980, lr_0 = 8.2422e-04
Loss = 3.8609e-02, PNorm = 102.3643, GNorm = 0.5856, lr_0 = 8.2365e-04
Loss = 4.9959e-02, PNorm = 102.4226, GNorm = 0.5932, lr_0 = 8.2309e-04
Loss = 4.9304e-02, PNorm = 102.4905, GNorm = 0.5184, lr_0 = 8.2252e-04
Loss = 4.4806e-02, PNorm = 102.5468, GNorm = 0.8038, lr_0 = 8.2196e-04
Loss = 4.3698e-02, PNorm = 102.6126, GNorm = 0.4406, lr_0 = 8.2140e-04
Loss = 4.5981e-02, PNorm = 102.6668, GNorm = 0.5340, lr_0 = 8.2084e-04
Loss = 4.7997e-02, PNorm = 102.7345, GNorm = 0.3656, lr_0 = 8.2027e-04
Loss = 4.1321e-02, PNorm = 102.7877, GNorm = 0.5210, lr_0 = 8.1971e-04
Loss = 3.8609e-02, PNorm = 102.8467, GNorm = 0.3215, lr_0 = 8.1915e-04
Loss = 4.7260e-02, PNorm = 102.8945, GNorm = 0.4803, lr_0 = 8.1859e-04
Loss = 4.1545e-02, PNorm = 102.9571, GNorm = 0.5155, lr_0 = 8.1803e-04
Loss = 5.0832e-02, PNorm = 103.0220, GNorm = 0.5503, lr_0 = 8.1747e-04
Loss = 4.3084e-02, PNorm = 103.0869, GNorm = 0.3139, lr_0 = 8.1691e-04
Loss = 4.5972e-02, PNorm = 103.1530, GNorm = 0.4814, lr_0 = 8.1635e-04
Loss = 4.0663e-02, PNorm = 103.2027, GNorm = 0.6818, lr_0 = 8.1579e-04
Loss = 4.5399e-02, PNorm = 103.2674, GNorm = 1.0172, lr_0 = 8.1523e-04
Loss = 4.8638e-02, PNorm = 103.3315, GNorm = 0.6188, lr_0 = 8.1467e-04
Loss = 4.2134e-02, PNorm = 103.3998, GNorm = 0.5954, lr_0 = 8.1411e-04
Loss = 4.0872e-02, PNorm = 103.4604, GNorm = 0.4942, lr_0 = 8.1355e-04
Loss = 4.4140e-02, PNorm = 103.5235, GNorm = 0.3182, lr_0 = 8.1300e-04
Loss = 5.4077e-02, PNorm = 103.5910, GNorm = 0.3605, lr_0 = 8.1244e-04
Loss = 4.7447e-02, PNorm = 103.6551, GNorm = 0.4363, lr_0 = 8.1188e-04
Loss = 6.0271e-02, PNorm = 103.7229, GNorm = 0.8577, lr_0 = 8.1133e-04
Loss = 4.3847e-02, PNorm = 103.7831, GNorm = 0.7220, lr_0 = 8.1077e-04
Loss = 4.8829e-02, PNorm = 103.8534, GNorm = 0.8765, lr_0 = 8.1022e-04
Loss = 4.4761e-02, PNorm = 103.9219, GNorm = 0.3359, lr_0 = 8.0966e-04
Loss = 5.3628e-02, PNorm = 103.9844, GNorm = 0.3387, lr_0 = 8.0911e-04
Loss = 4.7457e-02, PNorm = 104.0478, GNorm = 0.4347, lr_0 = 8.0855e-04
Loss = 4.5351e-02, PNorm = 104.1254, GNorm = 0.3929, lr_0 = 8.0800e-04
Loss = 4.8237e-02, PNorm = 104.1930, GNorm = 0.3269, lr_0 = 8.0745e-04
Loss = 5.8565e-02, PNorm = 104.2756, GNorm = 0.2512, lr_0 = 8.0689e-04
Loss = 4.5597e-02, PNorm = 104.3519, GNorm = 0.3756, lr_0 = 8.0634e-04
Loss = 4.6671e-02, PNorm = 104.4248, GNorm = 0.3654, lr_0 = 8.0579e-04
Loss = 4.2256e-02, PNorm = 104.4852, GNorm = 0.4460, lr_0 = 8.0523e-04
Loss = 6.0603e-02, PNorm = 104.5557, GNorm = 0.4355, lr_0 = 8.0468e-04
Loss = 4.3936e-02, PNorm = 104.6348, GNorm = 0.4929, lr_0 = 8.0413e-04
Loss = 6.0007e-02, PNorm = 104.7041, GNorm = 0.4644, lr_0 = 8.0358e-04
Loss = 4.7929e-02, PNorm = 104.7867, GNorm = 0.3808, lr_0 = 8.0303e-04
Loss = 4.9608e-02, PNorm = 104.8560, GNorm = 1.0280, lr_0 = 8.0248e-04
Loss = 5.1330e-02, PNorm = 104.9373, GNorm = 0.4037, lr_0 = 8.0193e-04
Loss = 5.0736e-02, PNorm = 105.0066, GNorm = 0.8294, lr_0 = 8.0138e-04
Loss = 4.9772e-02, PNorm = 105.0872, GNorm = 0.4575, lr_0 = 8.0083e-04
Loss = 5.3738e-02, PNorm = 105.1584, GNorm = 0.6366, lr_0 = 8.0028e-04
Loss = 4.6005e-02, PNorm = 105.2396, GNorm = 0.5015, lr_0 = 7.9974e-04
Loss = 4.6602e-02, PNorm = 105.3131, GNorm = 0.3632, lr_0 = 7.9919e-04
Loss = 5.2067e-02, PNorm = 105.3899, GNorm = 0.4983, lr_0 = 7.9864e-04
Loss = 4.8289e-02, PNorm = 105.4651, GNorm = 0.3494, lr_0 = 7.9809e-04
Loss = 4.6093e-02, PNorm = 105.5445, GNorm = 0.6494, lr_0 = 7.9755e-04
Loss = 5.6477e-02, PNorm = 105.6167, GNorm = 0.4917, lr_0 = 7.9700e-04
Loss = 4.9613e-02, PNorm = 105.6879, GNorm = 0.5142, lr_0 = 7.9645e-04
Loss = 5.0130e-02, PNorm = 105.7609, GNorm = 0.4541, lr_0 = 7.9591e-04
Loss = 5.8424e-02, PNorm = 105.8344, GNorm = 0.4446, lr_0 = 7.9536e-04
Loss = 5.2376e-02, PNorm = 105.9096, GNorm = 0.7553, lr_0 = 7.9482e-04
Loss = 5.9392e-02, PNorm = 105.9876, GNorm = 0.3665, lr_0 = 7.9427e-04
Loss = 5.1023e-02, PNorm = 106.0741, GNorm = 0.5741, lr_0 = 7.9373e-04
Loss = 5.5109e-02, PNorm = 106.1565, GNorm = 0.5186, lr_0 = 7.9319e-04
Loss = 4.2325e-02, PNorm = 106.2273, GNorm = 0.2966, lr_0 = 7.9264e-04
Loss = 5.3601e-02, PNorm = 106.2907, GNorm = 0.7217, lr_0 = 7.9210e-04
Loss = 5.0903e-02, PNorm = 106.3640, GNorm = 0.4593, lr_0 = 7.9156e-04
Loss = 4.6460e-02, PNorm = 106.4369, GNorm = 0.2638, lr_0 = 7.9101e-04
Loss = 5.1993e-02, PNorm = 106.5132, GNorm = 0.5281, lr_0 = 7.9047e-04
Loss = 4.5893e-02, PNorm = 106.5911, GNorm = 0.7163, lr_0 = 7.8993e-04
Loss = 5.0660e-02, PNorm = 106.6638, GNorm = 0.5337, lr_0 = 7.8939e-04
Loss = 5.8438e-02, PNorm = 106.7437, GNorm = 0.3090, lr_0 = 7.8885e-04
Loss = 5.7143e-02, PNorm = 106.8139, GNorm = 0.4069, lr_0 = 7.8831e-04
Loss = 5.3971e-02, PNorm = 106.8886, GNorm = 0.3358, lr_0 = 7.8777e-04
Loss = 5.7930e-02, PNorm = 106.9735, GNorm = 0.8224, lr_0 = 7.8723e-04
Loss = 4.8271e-02, PNorm = 107.0543, GNorm = 0.5903, lr_0 = 7.8669e-04
Loss = 5.2026e-02, PNorm = 107.1342, GNorm = 0.2790, lr_0 = 7.8615e-04
Loss = 5.7178e-02, PNorm = 107.2161, GNorm = 0.7217, lr_0 = 7.8561e-04
Loss = 5.2072e-02, PNorm = 107.2967, GNorm = 0.4520, lr_0 = 7.8507e-04
Loss = 5.0464e-02, PNorm = 107.3786, GNorm = 0.6238, lr_0 = 7.8454e-04
Loss = 4.5750e-02, PNorm = 107.4563, GNorm = 0.4721, lr_0 = 7.8400e-04
Loss = 5.4193e-02, PNorm = 107.5321, GNorm = 0.6453, lr_0 = 7.8346e-04
Loss = 5.4520e-02, PNorm = 107.5988, GNorm = 0.3853, lr_0 = 7.8293e-04
Loss = 5.4898e-02, PNorm = 107.6773, GNorm = 0.5816, lr_0 = 7.8239e-04
Loss = 6.2011e-02, PNorm = 107.7529, GNorm = 0.8400, lr_0 = 7.8185e-04
Loss = 4.9371e-02, PNorm = 107.8325, GNorm = 0.4798, lr_0 = 7.8132e-04
Validation mae = 0.494941
Epoch 5
Loss = 4.4501e-02, PNorm = 107.8921, GNorm = 0.5602, lr_0 = 7.8078e-04
Loss = 3.8580e-02, PNorm = 107.9468, GNorm = 0.3744, lr_0 = 7.8025e-04
Loss = 3.5861e-02, PNorm = 108.0007, GNorm = 0.2124, lr_0 = 7.7971e-04
Loss = 3.2417e-02, PNorm = 108.0449, GNorm = 0.2276, lr_0 = 7.7918e-04
Loss = 4.1532e-02, PNorm = 108.0957, GNorm = 0.5004, lr_0 = 7.7864e-04
Loss = 3.5067e-02, PNorm = 108.1484, GNorm = 0.3799, lr_0 = 7.7811e-04
Loss = 3.5596e-02, PNorm = 108.1961, GNorm = 0.1915, lr_0 = 7.7758e-04
Loss = 3.0521e-02, PNorm = 108.2466, GNorm = 0.2312, lr_0 = 7.7705e-04
Loss = 3.7556e-02, PNorm = 108.2887, GNorm = 0.2894, lr_0 = 7.7651e-04
Loss = 2.9865e-02, PNorm = 108.3394, GNorm = 0.2995, lr_0 = 7.7598e-04
Loss = 4.2387e-02, PNorm = 108.3832, GNorm = 0.2871, lr_0 = 7.7545e-04
Loss = 3.9675e-02, PNorm = 108.4402, GNorm = 0.2760, lr_0 = 7.7492e-04
Loss = 3.5268e-02, PNorm = 108.4893, GNorm = 0.5254, lr_0 = 7.7439e-04
Loss = 3.7729e-02, PNorm = 108.5411, GNorm = 0.4773, lr_0 = 7.7386e-04
Loss = 3.4103e-02, PNorm = 108.5935, GNorm = 0.3145, lr_0 = 7.7333e-04
Loss = 4.2071e-02, PNorm = 108.6422, GNorm = 1.1014, lr_0 = 7.7280e-04
Loss = 3.9451e-02, PNorm = 108.6923, GNorm = 0.6222, lr_0 = 7.7227e-04
Loss = 3.1457e-02, PNorm = 108.7562, GNorm = 0.3599, lr_0 = 7.7174e-04
Loss = 3.3793e-02, PNorm = 108.8124, GNorm = 0.2337, lr_0 = 7.7121e-04
Loss = 3.4587e-02, PNorm = 108.8691, GNorm = 0.2798, lr_0 = 7.7068e-04
Loss = 3.1673e-02, PNorm = 108.9187, GNorm = 0.3650, lr_0 = 7.7015e-04
Loss = 3.1014e-02, PNorm = 108.9633, GNorm = 0.2212, lr_0 = 7.6963e-04
Loss = 3.2440e-02, PNorm = 109.0113, GNorm = 0.2037, lr_0 = 7.6910e-04
Loss = 3.5149e-02, PNorm = 109.0588, GNorm = 0.3877, lr_0 = 7.6857e-04
Loss = 3.3450e-02, PNorm = 109.1119, GNorm = 0.3367, lr_0 = 7.6805e-04
Loss = 3.3904e-02, PNorm = 109.1594, GNorm = 0.4807, lr_0 = 7.6752e-04
Loss = 3.4497e-02, PNorm = 109.2054, GNorm = 0.2883, lr_0 = 7.6699e-04
Loss = 3.5719e-02, PNorm = 109.2608, GNorm = 0.5203, lr_0 = 7.6647e-04
Loss = 4.1277e-02, PNorm = 109.3066, GNorm = 0.5138, lr_0 = 7.6594e-04
Loss = 3.1995e-02, PNorm = 109.3629, GNorm = 0.5117, lr_0 = 7.6542e-04
Loss = 2.9095e-02, PNorm = 109.4131, GNorm = 0.2132, lr_0 = 7.6489e-04
Loss = 3.8316e-02, PNorm = 109.4582, GNorm = 0.2910, lr_0 = 7.6437e-04
Loss = 3.5117e-02, PNorm = 109.5122, GNorm = 0.4264, lr_0 = 7.6385e-04
Loss = 3.3961e-02, PNorm = 109.5656, GNorm = 0.3164, lr_0 = 7.6332e-04
Loss = 3.6333e-02, PNorm = 109.6219, GNorm = 0.2554, lr_0 = 7.6280e-04
Loss = 3.8214e-02, PNorm = 109.6798, GNorm = 0.3941, lr_0 = 7.6228e-04
Loss = 3.2894e-02, PNorm = 109.7400, GNorm = 0.3486, lr_0 = 7.6176e-04
Loss = 3.6370e-02, PNorm = 109.7927, GNorm = 0.2916, lr_0 = 7.6123e-04
Loss = 3.4549e-02, PNorm = 109.8500, GNorm = 0.2921, lr_0 = 7.6071e-04
Loss = 3.4912e-02, PNorm = 109.9073, GNorm = 0.3537, lr_0 = 7.6019e-04
Loss = 3.7731e-02, PNorm = 109.9589, GNorm = 0.2942, lr_0 = 7.5967e-04
Loss = 3.6304e-02, PNorm = 110.0224, GNorm = 0.6342, lr_0 = 7.5915e-04
Loss = 3.1523e-02, PNorm = 110.0841, GNorm = 0.3425, lr_0 = 7.5863e-04
Loss = 3.0999e-02, PNorm = 110.1370, GNorm = 0.5401, lr_0 = 7.5811e-04
Loss = 4.0022e-02, PNorm = 110.1946, GNorm = 0.4378, lr_0 = 7.5759e-04
Loss = 3.9282e-02, PNorm = 110.2477, GNorm = 0.5086, lr_0 = 7.5707e-04
Loss = 3.1556e-02, PNorm = 110.3094, GNorm = 0.2830, lr_0 = 7.5655e-04
Loss = 3.4848e-02, PNorm = 110.3596, GNorm = 0.6046, lr_0 = 7.5603e-04
Loss = 3.3484e-02, PNorm = 110.4237, GNorm = 0.4347, lr_0 = 7.5552e-04
Loss = 3.3989e-02, PNorm = 110.4757, GNorm = 0.2946, lr_0 = 7.5500e-04
Loss = 3.6617e-02, PNorm = 110.5403, GNorm = 0.5550, lr_0 = 7.5448e-04
Loss = 3.2157e-02, PNorm = 110.5967, GNorm = 0.4972, lr_0 = 7.5397e-04
Loss = 3.2987e-02, PNorm = 110.6485, GNorm = 0.2706, lr_0 = 7.5345e-04
Loss = 3.8950e-02, PNorm = 110.7009, GNorm = 0.4174, lr_0 = 7.5293e-04
Loss = 3.8003e-02, PNorm = 110.7585, GNorm = 0.6268, lr_0 = 7.5242e-04
Loss = 3.7292e-02, PNorm = 110.8232, GNorm = 0.3267, lr_0 = 7.5190e-04
Loss = 3.6828e-02, PNorm = 110.8787, GNorm = 0.4652, lr_0 = 7.5139e-04
Loss = 3.4119e-02, PNorm = 110.9388, GNorm = 0.2794, lr_0 = 7.5087e-04
Loss = 3.7203e-02, PNorm = 110.9882, GNorm = 0.4604, lr_0 = 7.5036e-04
Loss = 3.8076e-02, PNorm = 111.0491, GNorm = 0.3086, lr_0 = 7.4984e-04
Loss = 3.5014e-02, PNorm = 111.1118, GNorm = 0.4661, lr_0 = 7.4933e-04
Loss = 3.7982e-02, PNorm = 111.1702, GNorm = 0.2726, lr_0 = 7.4882e-04
Loss = 3.9350e-02, PNorm = 111.2246, GNorm = 0.9910, lr_0 = 7.4830e-04
Loss = 4.1292e-02, PNorm = 111.2984, GNorm = 0.3043, lr_0 = 7.4779e-04
Loss = 3.7500e-02, PNorm = 111.3690, GNorm = 0.6050, lr_0 = 7.4728e-04
Loss = 3.4292e-02, PNorm = 111.4347, GNorm = 0.6267, lr_0 = 7.4677e-04
Loss = 3.5983e-02, PNorm = 111.4935, GNorm = 0.2637, lr_0 = 7.4625e-04
Loss = 3.7934e-02, PNorm = 111.5567, GNorm = 0.2867, lr_0 = 7.4574e-04
Loss = 3.8821e-02, PNorm = 111.6149, GNorm = 0.4569, lr_0 = 7.4523e-04
Loss = 3.8130e-02, PNorm = 111.6816, GNorm = 0.2197, lr_0 = 7.4472e-04
Loss = 3.9493e-02, PNorm = 111.7529, GNorm = 0.3546, lr_0 = 7.4421e-04
Loss = 3.2048e-02, PNorm = 111.8173, GNorm = 0.6131, lr_0 = 7.4370e-04
Loss = 3.3990e-02, PNorm = 111.8754, GNorm = 0.7593, lr_0 = 7.4319e-04
Loss = 3.5169e-02, PNorm = 111.9377, GNorm = 0.2093, lr_0 = 7.4268e-04
Loss = 3.6779e-02, PNorm = 111.9992, GNorm = 0.5354, lr_0 = 7.4217e-04
Loss = 3.4646e-02, PNorm = 112.0589, GNorm = 0.4796, lr_0 = 7.4167e-04
Loss = 3.4351e-02, PNorm = 112.1158, GNorm = 0.2942, lr_0 = 7.4116e-04
Loss = 3.5862e-02, PNorm = 112.1784, GNorm = 0.4085, lr_0 = 7.4065e-04
Loss = 3.7425e-02, PNorm = 112.2388, GNorm = 0.4142, lr_0 = 7.4014e-04
Loss = 3.9521e-02, PNorm = 112.3056, GNorm = 0.2824, lr_0 = 7.3964e-04
Loss = 3.4769e-02, PNorm = 112.3638, GNorm = 0.7709, lr_0 = 7.3913e-04
Loss = 3.7126e-02, PNorm = 112.4263, GNorm = 0.4034, lr_0 = 7.3862e-04
Loss = 4.2184e-02, PNorm = 112.4951, GNorm = 0.2870, lr_0 = 7.3812e-04
Loss = 3.9024e-02, PNorm = 112.5659, GNorm = 0.5102, lr_0 = 7.3761e-04
Loss = 4.0169e-02, PNorm = 112.6353, GNorm = 0.5091, lr_0 = 7.3711e-04
Loss = 3.4805e-02, PNorm = 112.7099, GNorm = 0.5062, lr_0 = 7.3660e-04
Loss = 4.0086e-02, PNorm = 112.7739, GNorm = 0.3261, lr_0 = 7.3610e-04
Loss = 3.4252e-02, PNorm = 112.8333, GNorm = 0.2859, lr_0 = 7.3559e-04
Loss = 3.9490e-02, PNorm = 112.8933, GNorm = 0.2780, lr_0 = 7.3509e-04
Loss = 3.3527e-02, PNorm = 112.9542, GNorm = 0.6414, lr_0 = 7.3458e-04
Loss = 3.3515e-02, PNorm = 113.0210, GNorm = 0.3235, lr_0 = 7.3408e-04
Loss = 3.9069e-02, PNorm = 113.0856, GNorm = 0.6257, lr_0 = 7.3358e-04
Loss = 3.6037e-02, PNorm = 113.1542, GNorm = 0.6320, lr_0 = 7.3308e-04
Loss = 4.5367e-02, PNorm = 113.2228, GNorm = 0.8674, lr_0 = 7.3257e-04
Loss = 3.7784e-02, PNorm = 113.2967, GNorm = 1.0351, lr_0 = 7.3207e-04
Loss = 4.0466e-02, PNorm = 113.3699, GNorm = 0.5227, lr_0 = 7.3157e-04
Loss = 3.7883e-02, PNorm = 113.4470, GNorm = 0.3052, lr_0 = 7.3107e-04
Loss = 3.7269e-02, PNorm = 113.5132, GNorm = 0.6908, lr_0 = 7.3057e-04
Loss = 3.2341e-02, PNorm = 113.5704, GNorm = 0.2786, lr_0 = 7.3007e-04
Loss = 3.7951e-02, PNorm = 113.6268, GNorm = 0.3051, lr_0 = 7.2957e-04
Loss = 4.0692e-02, PNorm = 113.6880, GNorm = 0.7688, lr_0 = 7.2907e-04
Loss = 4.2323e-02, PNorm = 113.7586, GNorm = 0.7688, lr_0 = 7.2857e-04
Loss = 3.7645e-02, PNorm = 113.8218, GNorm = 0.3392, lr_0 = 7.2807e-04
Loss = 3.7190e-02, PNorm = 113.8853, GNorm = 0.4070, lr_0 = 7.2757e-04
Loss = 3.6534e-02, PNorm = 113.9564, GNorm = 0.2559, lr_0 = 7.2707e-04
Loss = 4.8204e-02, PNorm = 114.0329, GNorm = 0.5107, lr_0 = 7.2657e-04
Loss = 3.2998e-02, PNorm = 114.1075, GNorm = 0.8527, lr_0 = 7.2608e-04
Loss = 3.8658e-02, PNorm = 114.1777, GNorm = 1.1179, lr_0 = 7.2558e-04
Loss = 4.3720e-02, PNorm = 114.2451, GNorm = 0.6610, lr_0 = 7.2508e-04
Loss = 3.7140e-02, PNorm = 114.3133, GNorm = 0.4315, lr_0 = 7.2458e-04
Loss = 3.9599e-02, PNorm = 114.3882, GNorm = 0.3689, lr_0 = 7.2409e-04
Loss = 3.7942e-02, PNorm = 114.4577, GNorm = 0.3560, lr_0 = 7.2359e-04
Loss = 3.8492e-02, PNorm = 114.5250, GNorm = 0.3092, lr_0 = 7.2310e-04
Loss = 4.2774e-02, PNorm = 114.5943, GNorm = 0.2390, lr_0 = 7.2260e-04
Loss = 4.7645e-02, PNorm = 114.6719, GNorm = 0.9755, lr_0 = 7.2211e-04
Loss = 4.7424e-02, PNorm = 114.7549, GNorm = 0.7398, lr_0 = 7.2161e-04
Loss = 3.6557e-02, PNorm = 114.8306, GNorm = 0.3962, lr_0 = 7.2112e-04
Loss = 3.6295e-02, PNorm = 114.9055, GNorm = 0.6306, lr_0 = 7.2062e-04
Loss = 4.1458e-02, PNorm = 114.9694, GNorm = 0.5517, lr_0 = 7.2013e-04
Loss = 4.5615e-02, PNorm = 115.0442, GNorm = 0.6040, lr_0 = 7.1964e-04
Validation mae = 0.491231
Epoch 6
Loss = 2.6790e-02, PNorm = 115.1081, GNorm = 0.3073, lr_0 = 7.1914e-04
Loss = 2.9033e-02, PNorm = 115.1553, GNorm = 0.2641, lr_0 = 7.1865e-04
Loss = 3.2995e-02, PNorm = 115.2027, GNorm = 0.5905, lr_0 = 7.1816e-04
Loss = 2.9577e-02, PNorm = 115.2551, GNorm = 0.2749, lr_0 = 7.1767e-04
Loss = 3.6076e-02, PNorm = 115.3079, GNorm = 0.2156, lr_0 = 7.1717e-04
Loss = 2.7067e-02, PNorm = 115.3555, GNorm = 0.3031, lr_0 = 7.1668e-04
Loss = 3.0762e-02, PNorm = 115.4004, GNorm = 0.3405, lr_0 = 7.1619e-04
Loss = 2.6820e-02, PNorm = 115.4464, GNorm = 0.1899, lr_0 = 7.1570e-04
Loss = 2.9732e-02, PNorm = 115.4970, GNorm = 0.5750, lr_0 = 7.1521e-04
Loss = 2.5102e-02, PNorm = 115.5361, GNorm = 0.3121, lr_0 = 7.1472e-04
Loss = 3.2370e-02, PNorm = 115.5779, GNorm = 0.2944, lr_0 = 7.1423e-04
Loss = 3.1275e-02, PNorm = 115.6228, GNorm = 0.2741, lr_0 = 7.1374e-04
Loss = 2.7766e-02, PNorm = 115.6783, GNorm = 0.4495, lr_0 = 7.1325e-04
Loss = 2.9925e-02, PNorm = 115.7293, GNorm = 0.7490, lr_0 = 7.1277e-04
Loss = 2.8352e-02, PNorm = 115.7777, GNorm = 0.5628, lr_0 = 7.1228e-04
Loss = 2.9923e-02, PNorm = 115.8194, GNorm = 0.3279, lr_0 = 7.1179e-04
Loss = 2.8598e-02, PNorm = 115.8684, GNorm = 0.3472, lr_0 = 7.1130e-04
Loss = 3.1277e-02, PNorm = 115.9206, GNorm = 0.3570, lr_0 = 7.1081e-04
Loss = 2.8327e-02, PNorm = 115.9634, GNorm = 0.2409, lr_0 = 7.1033e-04
Loss = 2.9010e-02, PNorm = 116.0067, GNorm = 0.7129, lr_0 = 7.0984e-04
Loss = 2.8914e-02, PNorm = 116.0576, GNorm = 0.4662, lr_0 = 7.0935e-04
Loss = 2.8094e-02, PNorm = 116.1083, GNorm = 0.3089, lr_0 = 7.0887e-04
Loss = 2.4945e-02, PNorm = 116.1531, GNorm = 0.3093, lr_0 = 7.0838e-04
Loss = 2.8314e-02, PNorm = 116.1951, GNorm = 0.2494, lr_0 = 7.0790e-04
Loss = 2.9222e-02, PNorm = 116.2350, GNorm = 0.3484, lr_0 = 7.0741e-04
Loss = 2.9552e-02, PNorm = 116.2880, GNorm = 0.4152, lr_0 = 7.0693e-04
Loss = 2.4797e-02, PNorm = 116.3411, GNorm = 0.2405, lr_0 = 7.0644e-04
Loss = 2.4377e-02, PNorm = 116.3862, GNorm = 0.4251, lr_0 = 7.0596e-04
Loss = 2.6195e-02, PNorm = 116.4289, GNorm = 0.2867, lr_0 = 7.0548e-04
Loss = 2.8363e-02, PNorm = 116.4798, GNorm = 0.6982, lr_0 = 7.0499e-04
Loss = 2.3612e-02, PNorm = 116.5298, GNorm = 0.2730, lr_0 = 7.0451e-04
Loss = 2.7597e-02, PNorm = 116.5766, GNorm = 0.3242, lr_0 = 7.0403e-04
Loss = 3.0638e-02, PNorm = 116.6297, GNorm = 1.0015, lr_0 = 7.0354e-04
Loss = 2.7383e-02, PNorm = 116.6754, GNorm = 0.3961, lr_0 = 7.0306e-04
Loss = 3.3464e-02, PNorm = 116.7239, GNorm = 0.1672, lr_0 = 7.0258e-04
Loss = 2.7126e-02, PNorm = 116.7756, GNorm = 0.6634, lr_0 = 7.0210e-04
Loss = 2.4030e-02, PNorm = 116.8200, GNorm = 0.3771, lr_0 = 7.0162e-04
Loss = 2.6457e-02, PNorm = 116.8605, GNorm = 0.5012, lr_0 = 7.0114e-04
Loss = 2.6791e-02, PNorm = 116.9061, GNorm = 0.6888, lr_0 = 7.0066e-04
Loss = 2.2393e-02, PNorm = 116.9544, GNorm = 0.3083, lr_0 = 7.0018e-04
Loss = 3.3087e-02, PNorm = 117.0043, GNorm = 0.3538, lr_0 = 6.9970e-04
Loss = 2.2644e-02, PNorm = 117.0504, GNorm = 0.4236, lr_0 = 6.9922e-04
Loss = 2.6105e-02, PNorm = 117.0936, GNorm = 0.7207, lr_0 = 6.9874e-04
Loss = 2.3910e-02, PNorm = 117.1355, GNorm = 0.3968, lr_0 = 6.9826e-04
Loss = 2.7424e-02, PNorm = 117.1820, GNorm = 0.6843, lr_0 = 6.9778e-04
Loss = 2.6241e-02, PNorm = 117.2285, GNorm = 0.2225, lr_0 = 6.9730e-04
Loss = 3.1206e-02, PNorm = 117.2692, GNorm = 0.3975, lr_0 = 6.9683e-04
Loss = 2.9559e-02, PNorm = 117.3247, GNorm = 0.3240, lr_0 = 6.9635e-04
Loss = 2.8111e-02, PNorm = 117.3806, GNorm = 0.6249, lr_0 = 6.9587e-04
Loss = 3.0032e-02, PNorm = 117.4416, GNorm = 0.3426, lr_0 = 6.9540e-04
Loss = 2.6446e-02, PNorm = 117.4961, GNorm = 0.2768, lr_0 = 6.9492e-04
Loss = 2.8815e-02, PNorm = 117.5389, GNorm = 0.5264, lr_0 = 6.9444e-04
Loss = 2.5359e-02, PNorm = 117.5861, GNorm = 0.6773, lr_0 = 6.9397e-04
Loss = 2.3986e-02, PNorm = 117.6334, GNorm = 0.3955, lr_0 = 6.9349e-04
Loss = 2.9666e-02, PNorm = 117.6828, GNorm = 0.3822, lr_0 = 6.9302e-04
Loss = 2.6730e-02, PNorm = 117.7385, GNorm = 0.5856, lr_0 = 6.9254e-04
Loss = 2.6302e-02, PNorm = 117.7875, GNorm = 0.2573, lr_0 = 6.9207e-04
Loss = 2.5429e-02, PNorm = 117.8346, GNorm = 0.3431, lr_0 = 6.9159e-04
Loss = 2.6676e-02, PNorm = 117.8809, GNorm = 0.2968, lr_0 = 6.9112e-04
Loss = 2.6008e-02, PNorm = 117.9318, GNorm = 0.2362, lr_0 = 6.9065e-04
Loss = 2.5510e-02, PNorm = 117.9805, GNorm = 0.3381, lr_0 = 6.9017e-04
Loss = 3.1306e-02, PNorm = 118.0344, GNorm = 0.2603, lr_0 = 6.8970e-04
Loss = 3.4872e-02, PNorm = 118.0969, GNorm = 0.3707, lr_0 = 6.8923e-04
Loss = 2.6905e-02, PNorm = 118.1591, GNorm = 0.5206, lr_0 = 6.8876e-04
Loss = 3.1532e-02, PNorm = 118.2164, GNorm = 0.2993, lr_0 = 6.8828e-04
Loss = 3.0555e-02, PNorm = 118.2652, GNorm = 0.6380, lr_0 = 6.8781e-04
Loss = 3.0578e-02, PNorm = 118.3218, GNorm = 0.6868, lr_0 = 6.8734e-04
Loss = 3.0711e-02, PNorm = 118.3777, GNorm = 0.4280, lr_0 = 6.8687e-04
Loss = 3.2610e-02, PNorm = 118.4378, GNorm = 0.4553, lr_0 = 6.8640e-04
Loss = 2.9720e-02, PNorm = 118.4979, GNorm = 0.4867, lr_0 = 6.8593e-04
Loss = 2.9189e-02, PNorm = 118.5586, GNorm = 0.2673, lr_0 = 6.8546e-04
Loss = 3.3588e-02, PNorm = 118.6178, GNorm = 0.4369, lr_0 = 6.8499e-04
Loss = 2.9653e-02, PNorm = 118.6772, GNorm = 0.4128, lr_0 = 6.8452e-04
Loss = 2.9228e-02, PNorm = 118.7376, GNorm = 0.8678, lr_0 = 6.8405e-04
Loss = 2.7390e-02, PNorm = 118.7958, GNorm = 0.2588, lr_0 = 6.8358e-04
Loss = 2.9698e-02, PNorm = 118.8538, GNorm = 0.4240, lr_0 = 6.8312e-04
Loss = 2.6603e-02, PNorm = 118.9107, GNorm = 0.4634, lr_0 = 6.8265e-04
Loss = 2.6260e-02, PNorm = 118.9598, GNorm = 0.2647, lr_0 = 6.8218e-04
Loss = 2.7025e-02, PNorm = 119.0135, GNorm = 0.7256, lr_0 = 6.8171e-04
Loss = 2.4544e-02, PNorm = 119.0667, GNorm = 0.8313, lr_0 = 6.8125e-04
Loss = 2.7123e-02, PNorm = 119.1147, GNorm = 0.2476, lr_0 = 6.8078e-04
Loss = 3.2056e-02, PNorm = 119.1634, GNorm = 0.5510, lr_0 = 6.8031e-04
Loss = 3.0738e-02, PNorm = 119.2169, GNorm = 0.8141, lr_0 = 6.7985e-04
Loss = 2.6867e-02, PNorm = 119.2708, GNorm = 0.1795, lr_0 = 6.7938e-04
Loss = 2.8739e-02, PNorm = 119.3268, GNorm = 0.7227, lr_0 = 6.7892e-04
Loss = 2.8497e-02, PNorm = 119.3836, GNorm = 0.4319, lr_0 = 6.7845e-04
Loss = 2.5217e-02, PNorm = 119.4379, GNorm = 0.6544, lr_0 = 6.7799e-04
Loss = 2.9993e-02, PNorm = 119.4975, GNorm = 0.5722, lr_0 = 6.7752e-04
Loss = 3.1937e-02, PNorm = 119.5590, GNorm = 0.3214, lr_0 = 6.7706e-04
Loss = 3.5098e-02, PNorm = 119.6210, GNorm = 0.7106, lr_0 = 6.7659e-04
Loss = 3.0012e-02, PNorm = 119.6815, GNorm = 0.3830, lr_0 = 6.7613e-04
Loss = 3.2658e-02, PNorm = 119.7486, GNorm = 0.5115, lr_0 = 6.7567e-04
Loss = 3.0135e-02, PNorm = 119.8083, GNorm = 0.4868, lr_0 = 6.7520e-04
Loss = 3.2720e-02, PNorm = 119.8712, GNorm = 0.5146, lr_0 = 6.7474e-04
Loss = 2.7958e-02, PNorm = 119.9325, GNorm = 0.1762, lr_0 = 6.7428e-04
Loss = 2.5609e-02, PNorm = 119.9876, GNorm = 0.3296, lr_0 = 6.7382e-04
Loss = 2.6702e-02, PNorm = 120.0378, GNorm = 0.1789, lr_0 = 6.7335e-04
Loss = 2.8645e-02, PNorm = 120.0941, GNorm = 0.3901, lr_0 = 6.7289e-04
Loss = 3.3564e-02, PNorm = 120.1455, GNorm = 0.2292, lr_0 = 6.7243e-04
Loss = 2.6636e-02, PNorm = 120.1991, GNorm = 0.3944, lr_0 = 6.7197e-04
Loss = 3.4583e-02, PNorm = 120.2568, GNorm = 0.6862, lr_0 = 6.7151e-04
Loss = 2.9265e-02, PNorm = 120.3255, GNorm = 0.3513, lr_0 = 6.7105e-04
Loss = 3.1387e-02, PNorm = 120.3867, GNorm = 0.8903, lr_0 = 6.7059e-04
Loss = 3.2435e-02, PNorm = 120.4509, GNorm = 0.4641, lr_0 = 6.7013e-04
Loss = 2.8327e-02, PNorm = 120.5126, GNorm = 0.2194, lr_0 = 6.6967e-04
Loss = 2.8384e-02, PNorm = 120.5760, GNorm = 0.3113, lr_0 = 6.6921e-04
Loss = 3.2168e-02, PNorm = 120.6352, GNorm = 0.4262, lr_0 = 6.6876e-04
Loss = 2.7371e-02, PNorm = 120.6944, GNorm = 0.7451, lr_0 = 6.6830e-04
Loss = 2.9022e-02, PNorm = 120.7527, GNorm = 0.2734, lr_0 = 6.6784e-04
Loss = 3.0131e-02, PNorm = 120.8098, GNorm = 0.7027, lr_0 = 6.6738e-04
Loss = 2.8735e-02, PNorm = 120.8694, GNorm = 0.3263, lr_0 = 6.6693e-04
Loss = 3.1010e-02, PNorm = 120.9232, GNorm = 0.6322, lr_0 = 6.6647e-04
Loss = 3.4190e-02, PNorm = 120.9786, GNorm = 0.4324, lr_0 = 6.6601e-04
Loss = 3.3739e-02, PNorm = 121.0345, GNorm = 0.3920, lr_0 = 6.6556e-04
Loss = 3.1471e-02, PNorm = 121.1028, GNorm = 0.4997, lr_0 = 6.6510e-04
Loss = 3.5177e-02, PNorm = 121.1642, GNorm = 0.3228, lr_0 = 6.6464e-04
Loss = 3.9246e-02, PNorm = 121.2301, GNorm = 1.0228, lr_0 = 6.6419e-04
Loss = 3.0557e-02, PNorm = 121.2891, GNorm = 0.6464, lr_0 = 6.6373e-04
Loss = 3.3663e-02, PNorm = 121.3582, GNorm = 0.3649, lr_0 = 6.6328e-04
Loss = 4.7219e-02, PNorm = 121.4182, GNorm = 0.6435, lr_0 = 6.6282e-04
Validation mae = 0.489859
Epoch 7
Loss = 2.5402e-02, PNorm = 121.4825, GNorm = 0.2103, lr_0 = 6.6237e-04
Loss = 2.4895e-02, PNorm = 121.5246, GNorm = 0.4801, lr_0 = 6.6192e-04
Loss = 2.5610e-02, PNorm = 121.5698, GNorm = 0.1901, lr_0 = 6.6146e-04
Loss = 2.3894e-02, PNorm = 121.6166, GNorm = 0.3197, lr_0 = 6.6101e-04
Loss = 2.4894e-02, PNorm = 121.6592, GNorm = 0.3979, lr_0 = 6.6056e-04
Loss = 2.9489e-02, PNorm = 121.7027, GNorm = 0.5767, lr_0 = 6.6011e-04
Loss = 2.6739e-02, PNorm = 121.7374, GNorm = 0.4026, lr_0 = 6.5965e-04
Loss = 2.2381e-02, PNorm = 121.7799, GNorm = 0.2726, lr_0 = 6.5920e-04
Loss = 2.7060e-02, PNorm = 121.8277, GNorm = 0.1508, lr_0 = 6.5875e-04
Loss = 2.4476e-02, PNorm = 121.8736, GNorm = 0.7115, lr_0 = 6.5830e-04
Loss = 2.4492e-02, PNorm = 121.9117, GNorm = 0.3823, lr_0 = 6.5785e-04
Loss = 2.0426e-02, PNorm = 121.9499, GNorm = 0.2077, lr_0 = 6.5740e-04
Loss = 2.3460e-02, PNorm = 121.9875, GNorm = 0.2053, lr_0 = 6.5695e-04
Loss = 2.4013e-02, PNorm = 122.0239, GNorm = 0.1875, lr_0 = 6.5650e-04
Loss = 3.1543e-02, PNorm = 122.0645, GNorm = 0.3400, lr_0 = 6.5605e-04
Loss = 2.0842e-02, PNorm = 122.1065, GNorm = 0.1768, lr_0 = 6.5560e-04
Loss = 2.4621e-02, PNorm = 122.1554, GNorm = 0.6688, lr_0 = 6.5515e-04
Loss = 2.6048e-02, PNorm = 122.2035, GNorm = 0.2218, lr_0 = 6.5470e-04
Loss = 2.2939e-02, PNorm = 122.2485, GNorm = 0.2635, lr_0 = 6.5425e-04
Loss = 2.1528e-02, PNorm = 122.2955, GNorm = 0.3078, lr_0 = 6.5380e-04
Loss = 2.2611e-02, PNorm = 122.3369, GNorm = 0.1891, lr_0 = 6.5335e-04
Loss = 2.5278e-02, PNorm = 122.3773, GNorm = 0.2382, lr_0 = 6.5291e-04
Loss = 2.3461e-02, PNorm = 122.4189, GNorm = 0.4058, lr_0 = 6.5246e-04
Loss = 2.1952e-02, PNorm = 122.4579, GNorm = 0.4839, lr_0 = 6.5201e-04
Loss = 2.0915e-02, PNorm = 122.4944, GNorm = 0.5744, lr_0 = 6.5157e-04
Loss = 2.1683e-02, PNorm = 122.5370, GNorm = 0.3852, lr_0 = 6.5112e-04
Loss = 2.3033e-02, PNorm = 122.5779, GNorm = 0.4421, lr_0 = 6.5067e-04
Loss = 2.0159e-02, PNorm = 122.6187, GNorm = 0.4484, lr_0 = 6.5023e-04
Loss = 2.2302e-02, PNorm = 122.6553, GNorm = 1.0592, lr_0 = 6.4978e-04
Loss = 2.3042e-02, PNorm = 122.6913, GNorm = 0.3821, lr_0 = 6.4934e-04
Loss = 2.3788e-02, PNorm = 122.7328, GNorm = 0.2360, lr_0 = 6.4889e-04
Loss = 2.1210e-02, PNorm = 122.7768, GNorm = 0.6860, lr_0 = 6.4845e-04
Loss = 2.1680e-02, PNorm = 122.8190, GNorm = 0.2513, lr_0 = 6.4800e-04
Loss = 2.1108e-02, PNorm = 122.8614, GNorm = 0.2561, lr_0 = 6.4756e-04
Loss = 2.9137e-02, PNorm = 122.9003, GNorm = 0.2140, lr_0 = 6.4712e-04
Loss = 1.9442e-02, PNorm = 122.9444, GNorm = 0.3204, lr_0 = 6.4667e-04
Loss = 2.2005e-02, PNorm = 122.9919, GNorm = 0.4648, lr_0 = 6.4623e-04
Loss = 2.5883e-02, PNorm = 123.0319, GNorm = 0.3012, lr_0 = 6.4579e-04
Loss = 2.3110e-02, PNorm = 123.0765, GNorm = 0.4297, lr_0 = 6.4534e-04
Loss = 2.1126e-02, PNorm = 123.1238, GNorm = 0.3678, lr_0 = 6.4490e-04
Loss = 1.9150e-02, PNorm = 123.1689, GNorm = 0.2933, lr_0 = 6.4446e-04
Loss = 2.2330e-02, PNorm = 123.2097, GNorm = 0.2771, lr_0 = 6.4402e-04
Loss = 2.2122e-02, PNorm = 123.2505, GNorm = 0.3795, lr_0 = 6.4358e-04
Loss = 1.9149e-02, PNorm = 123.2958, GNorm = 0.3612, lr_0 = 6.4314e-04
Loss = 2.2024e-02, PNorm = 123.3345, GNorm = 0.2776, lr_0 = 6.4270e-04
Loss = 2.1332e-02, PNorm = 123.3797, GNorm = 0.2787, lr_0 = 6.4226e-04
Loss = 1.8815e-02, PNorm = 123.4212, GNorm = 0.2356, lr_0 = 6.4182e-04
Loss = 2.1260e-02, PNorm = 123.4570, GNorm = 0.1625, lr_0 = 6.4138e-04
Loss = 2.3112e-02, PNorm = 123.4996, GNorm = 0.6711, lr_0 = 6.4094e-04
Loss = 1.9338e-02, PNorm = 123.5445, GNorm = 0.3359, lr_0 = 6.4050e-04
Loss = 1.7241e-02, PNorm = 123.5842, GNorm = 0.2554, lr_0 = 6.4006e-04
Loss = 2.0513e-02, PNorm = 123.6229, GNorm = 0.1453, lr_0 = 6.3962e-04
Loss = 2.4984e-02, PNorm = 123.6563, GNorm = 0.6596, lr_0 = 6.3918e-04
Loss = 2.5653e-02, PNorm = 123.7034, GNorm = 0.3195, lr_0 = 6.3874e-04
Loss = 2.3709e-02, PNorm = 123.7488, GNorm = 0.3496, lr_0 = 6.3831e-04
Loss = 2.2715e-02, PNorm = 123.7967, GNorm = 0.5936, lr_0 = 6.3787e-04
Loss = 2.3850e-02, PNorm = 123.8430, GNorm = 0.3004, lr_0 = 6.3743e-04
Loss = 2.4826e-02, PNorm = 123.8908, GNorm = 0.3787, lr_0 = 6.3700e-04
Loss = 2.1474e-02, PNorm = 123.9363, GNorm = 0.3484, lr_0 = 6.3656e-04
Loss = 2.2681e-02, PNorm = 123.9849, GNorm = 0.1831, lr_0 = 6.3612e-04
Loss = 2.1452e-02, PNorm = 124.0315, GNorm = 0.4559, lr_0 = 6.3569e-04
Loss = 2.3473e-02, PNorm = 124.0762, GNorm = 0.3655, lr_0 = 6.3525e-04
Loss = 2.7697e-02, PNorm = 124.1221, GNorm = 0.2538, lr_0 = 6.3482e-04
Loss = 2.4299e-02, PNorm = 124.1733, GNorm = 0.3575, lr_0 = 6.3438e-04
Loss = 2.4475e-02, PNorm = 124.2195, GNorm = 0.2528, lr_0 = 6.3395e-04
Loss = 2.3414e-02, PNorm = 124.2720, GNorm = 0.1932, lr_0 = 6.3351e-04
Loss = 2.2661e-02, PNorm = 124.3234, GNorm = 0.4125, lr_0 = 6.3308e-04
Loss = 2.2065e-02, PNorm = 124.3722, GNorm = 0.4751, lr_0 = 6.3265e-04
Loss = 2.5669e-02, PNorm = 124.4180, GNorm = 0.2103, lr_0 = 6.3221e-04
Loss = 2.1010e-02, PNorm = 124.4672, GNorm = 0.5987, lr_0 = 6.3178e-04
Loss = 2.0254e-02, PNorm = 124.5090, GNorm = 0.2169, lr_0 = 6.3135e-04
Loss = 2.4598e-02, PNorm = 124.5556, GNorm = 0.5793, lr_0 = 6.3091e-04
Loss = 2.3254e-02, PNorm = 124.6088, GNorm = 0.3522, lr_0 = 6.3048e-04
Loss = 2.1742e-02, PNorm = 124.6558, GNorm = 0.5307, lr_0 = 6.3005e-04
Loss = 2.3103e-02, PNorm = 124.6933, GNorm = 0.5168, lr_0 = 6.2962e-04
Loss = 2.1899e-02, PNorm = 124.7310, GNorm = 0.5800, lr_0 = 6.2919e-04
Loss = 2.2955e-02, PNorm = 124.7762, GNorm = 0.2258, lr_0 = 6.2876e-04
Loss = 2.2502e-02, PNorm = 124.8253, GNorm = 0.1887, lr_0 = 6.2833e-04
Loss = 1.8880e-02, PNorm = 124.8724, GNorm = 0.1685, lr_0 = 6.2789e-04
Loss = 2.4083e-02, PNorm = 124.9134, GNorm = 0.3051, lr_0 = 6.2746e-04
Loss = 1.7215e-02, PNorm = 124.9582, GNorm = 0.1981, lr_0 = 6.2703e-04
Loss = 1.9912e-02, PNorm = 125.0029, GNorm = 0.3666, lr_0 = 6.2661e-04
Loss = 2.5515e-02, PNorm = 125.0408, GNorm = 0.2431, lr_0 = 6.2618e-04
Loss = 2.1068e-02, PNorm = 125.0918, GNorm = 0.4668, lr_0 = 6.2575e-04
Loss = 2.7369e-02, PNorm = 125.1336, GNorm = 0.6866, lr_0 = 6.2532e-04
Loss = 2.0869e-02, PNorm = 125.1791, GNorm = 0.2574, lr_0 = 6.2489e-04
Loss = 2.6467e-02, PNorm = 125.2185, GNorm = 0.4995, lr_0 = 6.2446e-04
Loss = 2.1701e-02, PNorm = 125.2643, GNorm = 0.4340, lr_0 = 6.2403e-04
Loss = 2.4410e-02, PNorm = 125.3115, GNorm = 0.7928, lr_0 = 6.2361e-04
Loss = 2.2218e-02, PNorm = 125.3558, GNorm = 0.3089, lr_0 = 6.2318e-04
Loss = 2.2373e-02, PNorm = 125.4074, GNorm = 0.2312, lr_0 = 6.2275e-04
Loss = 2.1964e-02, PNorm = 125.4538, GNorm = 0.2266, lr_0 = 6.2233e-04
Loss = 2.2414e-02, PNorm = 125.4988, GNorm = 0.4579, lr_0 = 6.2190e-04
Loss = 2.3978e-02, PNorm = 125.5402, GNorm = 0.5749, lr_0 = 6.2147e-04
Loss = 2.2036e-02, PNorm = 125.5844, GNorm = 0.2358, lr_0 = 6.2105e-04
Loss = 2.2700e-02, PNorm = 125.6234, GNorm = 0.6092, lr_0 = 6.2062e-04
Loss = 2.1849e-02, PNorm = 125.6688, GNorm = 0.1597, lr_0 = 6.2020e-04
Loss = 2.6321e-02, PNorm = 125.7105, GNorm = 0.3417, lr_0 = 6.1977e-04
Loss = 2.4030e-02, PNorm = 125.7586, GNorm = 0.6655, lr_0 = 6.1935e-04
Loss = 2.3663e-02, PNorm = 125.8109, GNorm = 0.7418, lr_0 = 6.1892e-04
Loss = 2.6579e-02, PNorm = 125.8677, GNorm = 0.5779, lr_0 = 6.1850e-04
Loss = 2.4968e-02, PNorm = 125.9283, GNorm = 0.6803, lr_0 = 6.1808e-04
Loss = 2.2289e-02, PNorm = 125.9817, GNorm = 0.3005, lr_0 = 6.1765e-04
Loss = 2.3605e-02, PNorm = 126.0335, GNorm = 0.4211, lr_0 = 6.1723e-04
Loss = 2.4801e-02, PNorm = 126.0872, GNorm = 0.4654, lr_0 = 6.1681e-04
Loss = 2.8017e-02, PNorm = 126.1406, GNorm = 0.3396, lr_0 = 6.1638e-04
Loss = 2.5511e-02, PNorm = 126.1926, GNorm = 0.2639, lr_0 = 6.1596e-04
Loss = 2.3148e-02, PNorm = 126.2442, GNorm = 0.3672, lr_0 = 6.1554e-04
Loss = 2.2122e-02, PNorm = 126.2920, GNorm = 0.3060, lr_0 = 6.1512e-04
Loss = 2.6069e-02, PNorm = 126.3413, GNorm = 0.4784, lr_0 = 6.1470e-04
Loss = 3.1171e-02, PNorm = 126.3945, GNorm = 1.5660, lr_0 = 6.1428e-04
Loss = 2.8435e-02, PNorm = 126.4397, GNorm = 0.5410, lr_0 = 6.1385e-04
Loss = 2.5407e-02, PNorm = 126.4904, GNorm = 0.2402, lr_0 = 6.1343e-04
Loss = 2.5417e-02, PNorm = 126.5456, GNorm = 0.7966, lr_0 = 6.1301e-04
Loss = 2.1905e-02, PNorm = 126.5945, GNorm = 0.2371, lr_0 = 6.1259e-04
Loss = 2.5675e-02, PNorm = 126.6458, GNorm = 0.5924, lr_0 = 6.1217e-04
Loss = 2.4886e-02, PNorm = 126.6972, GNorm = 0.3483, lr_0 = 6.1175e-04
Loss = 2.4053e-02, PNorm = 126.7515, GNorm = 0.2367, lr_0 = 6.1134e-04
Loss = 2.2503e-02, PNorm = 126.8062, GNorm = 0.2561, lr_0 = 6.1092e-04
Loss = 2.6838e-02, PNorm = 126.8598, GNorm = 0.2577, lr_0 = 6.1050e-04
Validation mae = 0.487498
Epoch 8
Loss = 2.1361e-02, PNorm = 126.8963, GNorm = 0.2055, lr_0 = 6.1008e-04
Loss = 2.2051e-02, PNorm = 126.9283, GNorm = 0.3463, lr_0 = 6.0966e-04
Loss = 1.8071e-02, PNorm = 126.9629, GNorm = 0.3104, lr_0 = 6.0924e-04
Loss = 1.7569e-02, PNorm = 126.9933, GNorm = 0.4947, lr_0 = 6.0883e-04
Loss = 2.2153e-02, PNorm = 127.0244, GNorm = 0.2942, lr_0 = 6.0841e-04
Loss = 2.1217e-02, PNorm = 127.0550, GNorm = 0.1738, lr_0 = 6.0799e-04
Loss = 1.8659e-02, PNorm = 127.0848, GNorm = 0.2082, lr_0 = 6.0758e-04
Loss = 2.5525e-02, PNorm = 127.1173, GNorm = 0.2822, lr_0 = 6.0716e-04
Loss = 2.0649e-02, PNorm = 127.1463, GNorm = 0.2885, lr_0 = 6.0674e-04
Loss = 2.0489e-02, PNorm = 127.1779, GNorm = 0.3031, lr_0 = 6.0633e-04
Loss = 1.8719e-02, PNorm = 127.2188, GNorm = 0.3693, lr_0 = 6.0591e-04
Loss = 2.0288e-02, PNorm = 127.2532, GNorm = 0.3995, lr_0 = 6.0550e-04
Loss = 1.8719e-02, PNorm = 127.2857, GNorm = 0.3084, lr_0 = 6.0508e-04
Loss = 1.9129e-02, PNorm = 127.3213, GNorm = 0.1214, lr_0 = 6.0467e-04
Loss = 1.6666e-02, PNorm = 127.3541, GNorm = 0.2974, lr_0 = 6.0425e-04
Loss = 1.7635e-02, PNorm = 127.3888, GNorm = 0.3404, lr_0 = 6.0384e-04
Loss = 1.6757e-02, PNorm = 127.4262, GNorm = 0.2912, lr_0 = 6.0343e-04
Loss = 1.9757e-02, PNorm = 127.4637, GNorm = 0.4373, lr_0 = 6.0301e-04
Loss = 1.5987e-02, PNorm = 127.5002, GNorm = 0.2345, lr_0 = 6.0260e-04
Loss = 2.0479e-02, PNorm = 127.5306, GNorm = 0.3581, lr_0 = 6.0219e-04
Loss = 1.8069e-02, PNorm = 127.5638, GNorm = 0.2233, lr_0 = 6.0178e-04
Loss = 1.6501e-02, PNorm = 127.5978, GNorm = 0.2215, lr_0 = 6.0136e-04
Loss = 1.6123e-02, PNorm = 127.6267, GNorm = 0.2687, lr_0 = 6.0095e-04
Loss = 1.9154e-02, PNorm = 127.6647, GNorm = 0.3620, lr_0 = 6.0054e-04
Loss = 1.9377e-02, PNorm = 127.7029, GNorm = 0.1652, lr_0 = 6.0013e-04
Loss = 1.7463e-02, PNorm = 127.7375, GNorm = 0.2260, lr_0 = 5.9972e-04
Loss = 1.5064e-02, PNorm = 127.7729, GNorm = 0.4911, lr_0 = 5.9931e-04
Loss = 1.4631e-02, PNorm = 127.8051, GNorm = 0.2556, lr_0 = 5.9890e-04
Loss = 1.7654e-02, PNorm = 127.8386, GNorm = 0.2262, lr_0 = 5.9849e-04
Loss = 1.6916e-02, PNorm = 127.8687, GNorm = 0.2206, lr_0 = 5.9808e-04
Loss = 1.7441e-02, PNorm = 127.9033, GNorm = 0.1723, lr_0 = 5.9767e-04
Loss = 1.5698e-02, PNorm = 127.9378, GNorm = 0.4209, lr_0 = 5.9726e-04
Loss = 1.6716e-02, PNorm = 127.9705, GNorm = 0.2235, lr_0 = 5.9685e-04
Loss = 1.7423e-02, PNorm = 128.0028, GNorm = 0.1341, lr_0 = 5.9644e-04
Loss = 1.4808e-02, PNorm = 128.0325, GNorm = 0.2839, lr_0 = 5.9603e-04
Loss = 1.8422e-02, PNorm = 128.0619, GNorm = 0.2689, lr_0 = 5.9562e-04
Loss = 1.8620e-02, PNorm = 128.0977, GNorm = 0.3526, lr_0 = 5.9521e-04
Loss = 1.8747e-02, PNorm = 128.1397, GNorm = 0.3219, lr_0 = 5.9481e-04
Loss = 1.4335e-02, PNorm = 128.1751, GNorm = 0.2037, lr_0 = 5.9440e-04
Loss = 1.5957e-02, PNorm = 128.2083, GNorm = 0.1808, lr_0 = 5.9399e-04
Loss = 1.6679e-02, PNorm = 128.2424, GNorm = 0.2853, lr_0 = 5.9358e-04
Loss = 1.8296e-02, PNorm = 128.2752, GNorm = 0.1920, lr_0 = 5.9318e-04
Loss = 1.7900e-02, PNorm = 128.3052, GNorm = 0.6549, lr_0 = 5.9277e-04
Loss = 1.7194e-02, PNorm = 128.3387, GNorm = 0.5086, lr_0 = 5.9236e-04
Loss = 1.4510e-02, PNorm = 128.3774, GNorm = 0.2042, lr_0 = 5.9196e-04
Loss = 1.5806e-02, PNorm = 128.4112, GNorm = 0.2489, lr_0 = 5.9155e-04
Loss = 1.7123e-02, PNorm = 128.4465, GNorm = 0.2469, lr_0 = 5.9115e-04
Loss = 1.8401e-02, PNorm = 128.4830, GNorm = 0.4685, lr_0 = 5.9074e-04
Loss = 1.6259e-02, PNorm = 128.5172, GNorm = 0.6372, lr_0 = 5.9034e-04
Loss = 1.6869e-02, PNorm = 128.5450, GNorm = 0.3549, lr_0 = 5.8993e-04
Loss = 1.7336e-02, PNorm = 128.5788, GNorm = 0.1861, lr_0 = 5.8953e-04
Loss = 1.6020e-02, PNorm = 128.6087, GNorm = 0.1796, lr_0 = 5.8913e-04
Loss = 1.7477e-02, PNorm = 128.6449, GNorm = 0.4090, lr_0 = 5.8872e-04
Loss = 1.6751e-02, PNorm = 128.6865, GNorm = 0.2668, lr_0 = 5.8832e-04
Loss = 1.8980e-02, PNorm = 128.7273, GNorm = 0.2572, lr_0 = 5.8792e-04
Loss = 2.0789e-02, PNorm = 128.7595, GNorm = 0.1903, lr_0 = 5.8751e-04
Loss = 1.6341e-02, PNorm = 128.7996, GNorm = 0.4704, lr_0 = 5.8711e-04
Loss = 1.8556e-02, PNorm = 128.8410, GNorm = 0.3750, lr_0 = 5.8671e-04
Loss = 1.8004e-02, PNorm = 128.8801, GNorm = 0.7210, lr_0 = 5.8631e-04
Loss = 1.9311e-02, PNorm = 128.9216, GNorm = 0.1556, lr_0 = 5.8591e-04
Loss = 1.8646e-02, PNorm = 128.9666, GNorm = 0.2189, lr_0 = 5.8550e-04
Loss = 1.7316e-02, PNorm = 129.0060, GNorm = 0.3106, lr_0 = 5.8510e-04
Loss = 1.8063e-02, PNorm = 129.0421, GNorm = 0.5609, lr_0 = 5.8470e-04
Loss = 1.7054e-02, PNorm = 129.0836, GNorm = 0.4227, lr_0 = 5.8430e-04
Loss = 1.6533e-02, PNorm = 129.1222, GNorm = 0.3136, lr_0 = 5.8390e-04
Loss = 1.6737e-02, PNorm = 129.1599, GNorm = 0.5531, lr_0 = 5.8350e-04
Loss = 2.0965e-02, PNorm = 129.1936, GNorm = 0.4161, lr_0 = 5.8310e-04
Loss = 1.9363e-02, PNorm = 129.2361, GNorm = 0.5498, lr_0 = 5.8270e-04
Loss = 1.6509e-02, PNorm = 129.2756, GNorm = 0.2070, lr_0 = 5.8230e-04
Loss = 1.4536e-02, PNorm = 129.3136, GNorm = 0.6660, lr_0 = 5.8190e-04
Loss = 1.9448e-02, PNorm = 129.3544, GNorm = 0.2921, lr_0 = 5.8151e-04
Loss = 1.5817e-02, PNorm = 129.3971, GNorm = 0.5334, lr_0 = 5.8111e-04
Loss = 1.7443e-02, PNorm = 129.4392, GNorm = 0.2971, lr_0 = 5.8071e-04
Loss = 1.8350e-02, PNorm = 129.4738, GNorm = 0.3141, lr_0 = 5.8031e-04
Loss = 1.8983e-02, PNorm = 129.5112, GNorm = 0.3366, lr_0 = 5.7991e-04
Loss = 2.0441e-02, PNorm = 129.5494, GNorm = 0.2538, lr_0 = 5.7952e-04
Loss = 2.0652e-02, PNorm = 129.5980, GNorm = 0.3697, lr_0 = 5.7912e-04
Loss = 1.9121e-02, PNorm = 129.6447, GNorm = 0.1761, lr_0 = 5.7872e-04
Loss = 2.0072e-02, PNorm = 129.6971, GNorm = 0.5199, lr_0 = 5.7833e-04
Loss = 1.9069e-02, PNorm = 129.7388, GNorm = 0.4983, lr_0 = 5.7793e-04
Loss = 1.7329e-02, PNorm = 129.7772, GNorm = 0.1475, lr_0 = 5.7753e-04
Loss = 1.5548e-02, PNorm = 129.8161, GNorm = 0.3337, lr_0 = 5.7714e-04
Loss = 1.7646e-02, PNorm = 129.8513, GNorm = 0.3079, lr_0 = 5.7674e-04
Loss = 1.6173e-02, PNorm = 129.8891, GNorm = 0.3026, lr_0 = 5.7635e-04
Loss = 1.9097e-02, PNorm = 129.9344, GNorm = 0.5217, lr_0 = 5.7595e-04
Loss = 1.8029e-02, PNorm = 129.9801, GNorm = 0.2633, lr_0 = 5.7556e-04
Loss = 1.8022e-02, PNorm = 130.0200, GNorm = 0.4214, lr_0 = 5.7516e-04
Loss = 2.0203e-02, PNorm = 130.0520, GNorm = 0.1708, lr_0 = 5.7477e-04
Loss = 2.0501e-02, PNorm = 130.0900, GNorm = 0.3594, lr_0 = 5.7438e-04
Loss = 2.0161e-02, PNorm = 130.1383, GNorm = 0.2843, lr_0 = 5.7398e-04
Loss = 1.7994e-02, PNorm = 130.1860, GNorm = 0.4149, lr_0 = 5.7359e-04
Loss = 1.7574e-02, PNorm = 130.2310, GNorm = 0.2979, lr_0 = 5.7320e-04
Loss = 1.9925e-02, PNorm = 130.2742, GNorm = 0.2463, lr_0 = 5.7280e-04
Loss = 1.8246e-02, PNorm = 130.3218, GNorm = 0.3291, lr_0 = 5.7241e-04
Loss = 1.5876e-02, PNorm = 130.3619, GNorm = 0.4827, lr_0 = 5.7202e-04
Loss = 1.8954e-02, PNorm = 130.4034, GNorm = 0.6632, lr_0 = 5.7163e-04
Loss = 2.0346e-02, PNorm = 130.4440, GNorm = 0.4624, lr_0 = 5.7124e-04
Loss = 2.3035e-02, PNorm = 130.4886, GNorm = 1.0128, lr_0 = 5.7084e-04
Loss = 2.1477e-02, PNorm = 130.5317, GNorm = 0.7546, lr_0 = 5.7045e-04
Loss = 1.8270e-02, PNorm = 130.5809, GNorm = 0.2243, lr_0 = 5.7006e-04
Loss = 1.7839e-02, PNorm = 130.6275, GNorm = 0.2681, lr_0 = 5.6967e-04
Loss = 1.9563e-02, PNorm = 130.6668, GNorm = 0.4469, lr_0 = 5.6928e-04
Loss = 1.8512e-02, PNorm = 130.7080, GNorm = 0.3608, lr_0 = 5.6889e-04
Loss = 1.7526e-02, PNorm = 130.7432, GNorm = 0.3762, lr_0 = 5.6850e-04
Loss = 2.0235e-02, PNorm = 130.7876, GNorm = 0.4123, lr_0 = 5.6811e-04
Loss = 2.3349e-02, PNorm = 130.8258, GNorm = 0.3302, lr_0 = 5.6772e-04
Loss = 2.0559e-02, PNorm = 130.8708, GNorm = 0.1407, lr_0 = 5.6733e-04
Loss = 1.7179e-02, PNorm = 130.9120, GNorm = 0.2065, lr_0 = 5.6695e-04
Loss = 1.6644e-02, PNorm = 130.9521, GNorm = 0.3692, lr_0 = 5.6656e-04
Loss = 1.8204e-02, PNorm = 130.9925, GNorm = 0.2333, lr_0 = 5.6617e-04
Loss = 1.7418e-02, PNorm = 131.0283, GNorm = 0.3132, lr_0 = 5.6578e-04
Loss = 2.2459e-02, PNorm = 131.0710, GNorm = 0.4535, lr_0 = 5.6539e-04
Loss = 1.7614e-02, PNorm = 131.1077, GNorm = 0.3086, lr_0 = 5.6501e-04
Loss = 1.7575e-02, PNorm = 131.1515, GNorm = 0.3308, lr_0 = 5.6462e-04
Loss = 1.6632e-02, PNorm = 131.1879, GNorm = 0.3261, lr_0 = 5.6423e-04
Loss = 2.3198e-02, PNorm = 131.2259, GNorm = 0.4575, lr_0 = 5.6385e-04
Loss = 1.7221e-02, PNorm = 131.2631, GNorm = 0.1907, lr_0 = 5.6346e-04
Loss = 1.5541e-02, PNorm = 131.3024, GNorm = 0.1947, lr_0 = 5.6307e-04
Loss = 1.7977e-02, PNorm = 131.3420, GNorm = 0.2771, lr_0 = 5.6269e-04
Loss = 1.9431e-02, PNorm = 131.3784, GNorm = 0.4025, lr_0 = 5.6230e-04
Validation mae = 0.486352
Epoch 9
Loss = 1.5221e-02, PNorm = 131.4107, GNorm = 0.3801, lr_0 = 5.6192e-04
Loss = 1.5083e-02, PNorm = 131.4362, GNorm = 0.2959, lr_0 = 5.6153e-04
Loss = 1.3121e-02, PNorm = 131.4614, GNorm = 0.1560, lr_0 = 5.6115e-04
Loss = 1.7116e-02, PNorm = 131.4876, GNorm = 0.5818, lr_0 = 5.6076e-04
Loss = 1.3130e-02, PNorm = 131.5129, GNorm = 0.4194, lr_0 = 5.6038e-04
Loss = 1.5939e-02, PNorm = 131.5390, GNorm = 0.3221, lr_0 = 5.6000e-04
Loss = 2.1806e-02, PNorm = 131.5607, GNorm = 0.3661, lr_0 = 5.5961e-04
Loss = 1.5685e-02, PNorm = 131.5908, GNorm = 0.1233, lr_0 = 5.5923e-04
Loss = 1.4237e-02, PNorm = 131.6171, GNorm = 0.4524, lr_0 = 5.5885e-04
Loss = 1.5662e-02, PNorm = 131.6453, GNorm = 0.1542, lr_0 = 5.5846e-04
Loss = 1.4103e-02, PNorm = 131.6725, GNorm = 0.1067, lr_0 = 5.5808e-04
Loss = 1.3874e-02, PNorm = 131.6995, GNorm = 0.3694, lr_0 = 5.5770e-04
Loss = 1.5546e-02, PNorm = 131.7327, GNorm = 0.3033, lr_0 = 5.5732e-04
Loss = 1.6142e-02, PNorm = 131.7663, GNorm = 0.2196, lr_0 = 5.5693e-04
Loss = 1.5309e-02, PNorm = 131.7963, GNorm = 0.1854, lr_0 = 5.5655e-04
Loss = 1.2988e-02, PNorm = 131.8320, GNorm = 0.4695, lr_0 = 5.5617e-04
Loss = 1.5661e-02, PNorm = 131.8587, GNorm = 0.2557, lr_0 = 5.5579e-04
Loss = 1.2287e-02, PNorm = 131.8807, GNorm = 0.2577, lr_0 = 5.5541e-04
Loss = 1.4339e-02, PNorm = 131.9052, GNorm = 0.3286, lr_0 = 5.5503e-04
Loss = 1.5877e-02, PNorm = 131.9349, GNorm = 0.3778, lr_0 = 5.5465e-04
Loss = 1.2713e-02, PNorm = 131.9666, GNorm = 0.2607, lr_0 = 5.5427e-04
Loss = 1.5179e-02, PNorm = 131.9968, GNorm = 0.3159, lr_0 = 5.5389e-04
Loss = 1.4735e-02, PNorm = 132.0309, GNorm = 0.1213, lr_0 = 5.5351e-04
Loss = 1.3350e-02, PNorm = 132.0678, GNorm = 0.5786, lr_0 = 5.5313e-04
Loss = 1.4797e-02, PNorm = 132.0992, GNorm = 0.6510, lr_0 = 5.5275e-04
Loss = 1.3267e-02, PNorm = 132.1284, GNorm = 0.4230, lr_0 = 5.5237e-04
Loss = 1.6055e-02, PNorm = 132.1586, GNorm = 0.5876, lr_0 = 5.5199e-04
Loss = 1.3157e-02, PNorm = 132.1916, GNorm = 0.3912, lr_0 = 5.5162e-04
Loss = 1.4481e-02, PNorm = 132.2254, GNorm = 0.1585, lr_0 = 5.5124e-04
Loss = 1.5133e-02, PNorm = 132.2536, GNorm = 0.4186, lr_0 = 5.5086e-04
Loss = 1.3187e-02, PNorm = 132.2826, GNorm = 0.2657, lr_0 = 5.5048e-04
Loss = 1.5876e-02, PNorm = 132.3098, GNorm = 0.8853, lr_0 = 5.5011e-04
Loss = 1.5006e-02, PNorm = 132.3360, GNorm = 0.5268, lr_0 = 5.4973e-04
Loss = 1.3624e-02, PNorm = 132.3674, GNorm = 0.3023, lr_0 = 5.4935e-04
Loss = 1.7922e-02, PNorm = 132.3954, GNorm = 0.3997, lr_0 = 5.4898e-04
Loss = 1.5469e-02, PNorm = 132.4290, GNorm = 0.2419, lr_0 = 5.4860e-04
Loss = 1.3242e-02, PNorm = 132.4603, GNorm = 0.2713, lr_0 = 5.4822e-04
Loss = 1.5065e-02, PNorm = 132.4922, GNorm = 0.4818, lr_0 = 5.4785e-04
Loss = 1.5118e-02, PNorm = 132.5227, GNorm = 0.2540, lr_0 = 5.4747e-04
Loss = 1.3008e-02, PNorm = 132.5542, GNorm = 0.5255, lr_0 = 5.4710e-04
Loss = 1.3023e-02, PNorm = 132.5856, GNorm = 0.3808, lr_0 = 5.4672e-04
Loss = 1.2995e-02, PNorm = 132.6206, GNorm = 0.1784, lr_0 = 5.4635e-04
Loss = 1.3626e-02, PNorm = 132.6517, GNorm = 0.1986, lr_0 = 5.4597e-04
Loss = 1.1564e-02, PNorm = 132.6793, GNorm = 0.2394, lr_0 = 5.4560e-04
Loss = 1.4998e-02, PNorm = 132.7058, GNorm = 0.2371, lr_0 = 5.4523e-04
Loss = 1.5029e-02, PNorm = 132.7375, GNorm = 0.4277, lr_0 = 5.4485e-04
Loss = 1.2361e-02, PNorm = 132.7665, GNorm = 0.2030, lr_0 = 5.4448e-04
Loss = 1.4121e-02, PNorm = 132.7932, GNorm = 0.5623, lr_0 = 5.4411e-04
Loss = 1.5844e-02, PNorm = 132.8240, GNorm = 0.2838, lr_0 = 5.4373e-04
Loss = 1.7434e-02, PNorm = 132.8487, GNorm = 0.1611, lr_0 = 5.4336e-04
Loss = 1.4617e-02, PNorm = 132.8802, GNorm = 0.4408, lr_0 = 5.4299e-04
Loss = 1.2029e-02, PNorm = 132.9124, GNorm = 0.4435, lr_0 = 5.4262e-04
Loss = 1.5494e-02, PNorm = 132.9414, GNorm = 0.4197, lr_0 = 5.4225e-04
Loss = 1.5850e-02, PNorm = 132.9766, GNorm = 0.3883, lr_0 = 5.4187e-04
Loss = 1.4680e-02, PNorm = 133.0157, GNorm = 0.1221, lr_0 = 5.4150e-04
Loss = 1.3489e-02, PNorm = 133.0491, GNorm = 0.3580, lr_0 = 5.4113e-04
Loss = 1.5339e-02, PNorm = 133.0759, GNorm = 0.5479, lr_0 = 5.4076e-04
Loss = 1.6042e-02, PNorm = 133.1099, GNorm = 0.5214, lr_0 = 5.4039e-04
Loss = 1.4398e-02, PNorm = 133.1448, GNorm = 0.1592, lr_0 = 5.4002e-04
Loss = 1.4513e-02, PNorm = 133.1775, GNorm = 0.2823, lr_0 = 5.3965e-04
Loss = 1.5065e-02, PNorm = 133.2065, GNorm = 0.3595, lr_0 = 5.3928e-04
Loss = 1.2331e-02, PNorm = 133.2395, GNorm = 0.1657, lr_0 = 5.3891e-04
Loss = 1.2062e-02, PNorm = 133.2667, GNorm = 0.2901, lr_0 = 5.3854e-04
Loss = 1.3587e-02, PNorm = 133.2981, GNorm = 0.4835, lr_0 = 5.3817e-04
Loss = 1.7196e-02, PNorm = 133.3320, GNorm = 0.8346, lr_0 = 5.3781e-04
Loss = 1.4647e-02, PNorm = 133.3695, GNorm = 0.2680, lr_0 = 5.3744e-04
Loss = 1.5563e-02, PNorm = 133.4041, GNorm = 0.1258, lr_0 = 5.3707e-04
Loss = 1.7230e-02, PNorm = 133.4367, GNorm = 0.6251, lr_0 = 5.3670e-04
Loss = 1.6045e-02, PNorm = 133.4713, GNorm = 0.2649, lr_0 = 5.3633e-04
Loss = 1.5644e-02, PNorm = 133.5052, GNorm = 0.2233, lr_0 = 5.3597e-04
Loss = 1.4931e-02, PNorm = 133.5402, GNorm = 0.3666, lr_0 = 5.3560e-04
Loss = 1.7510e-02, PNorm = 133.5675, GNorm = 0.6442, lr_0 = 5.3523e-04
Loss = 1.3825e-02, PNorm = 133.5993, GNorm = 0.3920, lr_0 = 5.3486e-04
Loss = 1.4837e-02, PNorm = 133.6283, GNorm = 0.2043, lr_0 = 5.3450e-04
Loss = 1.5841e-02, PNorm = 133.6728, GNorm = 0.6235, lr_0 = 5.3413e-04
Loss = 1.4862e-02, PNorm = 133.7115, GNorm = 0.4273, lr_0 = 5.3377e-04
Loss = 1.8951e-02, PNorm = 133.7500, GNorm = 0.4588, lr_0 = 5.3340e-04
Loss = 1.5370e-02, PNorm = 133.7881, GNorm = 0.3626, lr_0 = 5.3304e-04
Loss = 1.2575e-02, PNorm = 133.8301, GNorm = 0.5952, lr_0 = 5.3267e-04
Loss = 1.3868e-02, PNorm = 133.8553, GNorm = 0.7539, lr_0 = 5.3231e-04
Loss = 1.4033e-02, PNorm = 133.8878, GNorm = 0.3957, lr_0 = 5.3194e-04
Loss = 1.3990e-02, PNorm = 133.9179, GNorm = 0.4905, lr_0 = 5.3158e-04
Loss = 1.5166e-02, PNorm = 133.9579, GNorm = 0.3392, lr_0 = 5.3121e-04
Loss = 1.5102e-02, PNorm = 133.9925, GNorm = 0.4166, lr_0 = 5.3085e-04
Loss = 1.4459e-02, PNorm = 134.0318, GNorm = 0.7530, lr_0 = 5.3048e-04
Loss = 1.7249e-02, PNorm = 134.0665, GNorm = 0.5629, lr_0 = 5.3012e-04
Loss = 1.6938e-02, PNorm = 134.1096, GNorm = 0.7281, lr_0 = 5.2976e-04
Loss = 1.3341e-02, PNorm = 134.1472, GNorm = 0.2211, lr_0 = 5.2939e-04
Loss = 1.4922e-02, PNorm = 134.1805, GNorm = 0.6557, lr_0 = 5.2903e-04
Loss = 1.6448e-02, PNorm = 134.2102, GNorm = 0.2380, lr_0 = 5.2867e-04
Loss = 1.5009e-02, PNorm = 134.2449, GNorm = 0.2301, lr_0 = 5.2831e-04
Loss = 1.3848e-02, PNorm = 134.2878, GNorm = 0.2276, lr_0 = 5.2795e-04
Loss = 1.4313e-02, PNorm = 134.3325, GNorm = 0.3697, lr_0 = 5.2758e-04
Loss = 1.7077e-02, PNorm = 134.3644, GNorm = 0.3375, lr_0 = 5.2722e-04
Loss = 1.4110e-02, PNorm = 134.3955, GNorm = 0.3028, lr_0 = 5.2686e-04
Loss = 1.4620e-02, PNorm = 134.4277, GNorm = 0.4176, lr_0 = 5.2650e-04
Loss = 1.5338e-02, PNorm = 134.4665, GNorm = 0.2135, lr_0 = 5.2614e-04
Loss = 1.4425e-02, PNorm = 134.5044, GNorm = 0.2190, lr_0 = 5.2578e-04
Loss = 1.3807e-02, PNorm = 134.5368, GNorm = 0.6480, lr_0 = 5.2542e-04
Loss = 1.7073e-02, PNorm = 134.5732, GNorm = 0.5367, lr_0 = 5.2506e-04
Loss = 1.4962e-02, PNorm = 134.6118, GNorm = 0.6531, lr_0 = 5.2470e-04
Loss = 1.3471e-02, PNorm = 134.6462, GNorm = 0.1592, lr_0 = 5.2434e-04
Loss = 1.4791e-02, PNorm = 134.6799, GNorm = 0.1746, lr_0 = 5.2398e-04
Loss = 1.3008e-02, PNorm = 134.7180, GNorm = 0.3082, lr_0 = 5.2362e-04
Loss = 1.5935e-02, PNorm = 134.7537, GNorm = 0.6754, lr_0 = 5.2326e-04
Loss = 1.5115e-02, PNorm = 134.7870, GNorm = 0.1758, lr_0 = 5.2290e-04
Loss = 1.4640e-02, PNorm = 134.8192, GNorm = 0.6120, lr_0 = 5.2255e-04
Loss = 1.6193e-02, PNorm = 134.8544, GNorm = 0.3798, lr_0 = 5.2219e-04
Loss = 1.6151e-02, PNorm = 134.8959, GNorm = 0.5709, lr_0 = 5.2183e-04
Loss = 1.6443e-02, PNorm = 134.9386, GNorm = 0.2457, lr_0 = 5.2147e-04
Loss = 1.5939e-02, PNorm = 134.9822, GNorm = 0.1944, lr_0 = 5.2112e-04
Loss = 1.4353e-02, PNorm = 135.0214, GNorm = 0.2315, lr_0 = 5.2076e-04
Loss = 1.3534e-02, PNorm = 135.0556, GNorm = 0.2152, lr_0 = 5.2040e-04
Loss = 1.4722e-02, PNorm = 135.0894, GNorm = 0.3605, lr_0 = 5.2005e-04
Loss = 1.6223e-02, PNorm = 135.1234, GNorm = 0.1994, lr_0 = 5.1969e-04
Loss = 1.5443e-02, PNorm = 135.1597, GNorm = 0.3266, lr_0 = 5.1933e-04
Loss = 1.8374e-02, PNorm = 135.1952, GNorm = 0.2237, lr_0 = 5.1898e-04
Loss = 1.4857e-02, PNorm = 135.2336, GNorm = 0.5243, lr_0 = 5.1862e-04
Loss = 1.5012e-02, PNorm = 135.2685, GNorm = 0.7055, lr_0 = 5.1827e-04
Loss = 1.6507e-02, PNorm = 135.3079, GNorm = 0.5787, lr_0 = 5.1791e-04
Validation mae = 0.485852
Epoch 10
Loss = 1.5660e-02, PNorm = 135.3407, GNorm = 0.2528, lr_0 = 5.1756e-04
Loss = 1.2253e-02, PNorm = 135.3644, GNorm = 0.3216, lr_0 = 5.1720e-04
Loss = 1.7018e-02, PNorm = 135.3884, GNorm = 0.4693, lr_0 = 5.1685e-04
Loss = 1.0633e-02, PNorm = 135.4161, GNorm = 0.1183, lr_0 = 5.1649e-04
Loss = 1.3488e-02, PNorm = 135.4383, GNorm = 0.2972, lr_0 = 5.1614e-04
Loss = 1.4970e-02, PNorm = 135.4633, GNorm = 0.2200, lr_0 = 5.1579e-04
Loss = 1.3097e-02, PNorm = 135.4929, GNorm = 0.5087, lr_0 = 5.1543e-04
Loss = 1.3996e-02, PNorm = 135.5169, GNorm = 0.4310, lr_0 = 5.1508e-04
Loss = 1.2460e-02, PNorm = 135.5415, GNorm = 0.3841, lr_0 = 5.1473e-04
Loss = 1.2605e-02, PNorm = 135.5686, GNorm = 0.5074, lr_0 = 5.1437e-04
Loss = 1.3353e-02, PNorm = 135.5931, GNorm = 0.3919, lr_0 = 5.1402e-04
Loss = 1.3536e-02, PNorm = 135.6176, GNorm = 0.4945, lr_0 = 5.1367e-04
Loss = 1.3296e-02, PNorm = 135.6418, GNorm = 0.4671, lr_0 = 5.1332e-04
Loss = 1.5105e-02, PNorm = 135.6685, GNorm = 0.2931, lr_0 = 5.1297e-04
Loss = 1.1761e-02, PNorm = 135.6953, GNorm = 0.2133, lr_0 = 5.1262e-04
Loss = 1.1553e-02, PNorm = 135.7232, GNorm = 0.3197, lr_0 = 5.1226e-04
Loss = 1.1526e-02, PNorm = 135.7476, GNorm = 0.1159, lr_0 = 5.1191e-04
Loss = 1.1432e-02, PNorm = 135.7727, GNorm = 0.3200, lr_0 = 5.1156e-04
Loss = 1.1854e-02, PNorm = 135.7967, GNorm = 0.2736, lr_0 = 5.1121e-04
Loss = 1.3404e-02, PNorm = 135.8202, GNorm = 0.3204, lr_0 = 5.1086e-04
Loss = 1.3593e-02, PNorm = 135.8462, GNorm = 0.2613, lr_0 = 5.1051e-04
Loss = 1.3306e-02, PNorm = 135.8719, GNorm = 0.8860, lr_0 = 5.1016e-04
Loss = 1.4124e-02, PNorm = 135.9002, GNorm = 0.1168, lr_0 = 5.0981e-04
Loss = 1.2882e-02, PNorm = 135.9243, GNorm = 0.2514, lr_0 = 5.0946e-04
Loss = 1.1136e-02, PNorm = 135.9486, GNorm = 0.4103, lr_0 = 5.0911e-04
Loss = 1.3540e-02, PNorm = 135.9757, GNorm = 0.2110, lr_0 = 5.0877e-04
Loss = 1.2470e-02, PNorm = 136.0016, GNorm = 0.1509, lr_0 = 5.0842e-04
Loss = 1.3173e-02, PNorm = 136.0346, GNorm = 0.1646, lr_0 = 5.0807e-04
Loss = 1.2955e-02, PNorm = 136.0608, GNorm = 0.5258, lr_0 = 5.0772e-04
Loss = 1.1538e-02, PNorm = 136.0827, GNorm = 0.6888, lr_0 = 5.0737e-04
Loss = 1.1696e-02, PNorm = 136.1075, GNorm = 0.4448, lr_0 = 5.0703e-04
Loss = 1.3168e-02, PNorm = 136.1372, GNorm = 0.1721, lr_0 = 5.0668e-04
Loss = 1.2294e-02, PNorm = 136.1650, GNorm = 0.2914, lr_0 = 5.0633e-04
Loss = 1.4114e-02, PNorm = 136.1956, GNorm = 0.3751, lr_0 = 5.0598e-04
Loss = 1.2019e-02, PNorm = 136.2222, GNorm = 0.3631, lr_0 = 5.0564e-04
Loss = 1.3017e-02, PNorm = 136.2479, GNorm = 0.2710, lr_0 = 5.0529e-04
Loss = 1.3876e-02, PNorm = 136.2793, GNorm = 0.2554, lr_0 = 5.0494e-04
Loss = 1.2206e-02, PNorm = 136.3083, GNorm = 0.1951, lr_0 = 5.0460e-04
Loss = 1.1930e-02, PNorm = 136.3358, GNorm = 0.2115, lr_0 = 5.0425e-04
Loss = 1.2487e-02, PNorm = 136.3633, GNorm = 0.2080, lr_0 = 5.0391e-04
Loss = 1.2866e-02, PNorm = 136.3887, GNorm = 0.2938, lr_0 = 5.0356e-04
Loss = 1.0606e-02, PNorm = 136.4205, GNorm = 0.3483, lr_0 = 5.0322e-04
Loss = 1.1175e-02, PNorm = 136.4489, GNorm = 0.3747, lr_0 = 5.0287e-04
Loss = 1.2395e-02, PNorm = 136.4708, GNorm = 0.2385, lr_0 = 5.0253e-04
Loss = 1.1100e-02, PNorm = 136.4962, GNorm = 0.1818, lr_0 = 5.0218e-04
Loss = 1.3695e-02, PNorm = 136.5307, GNorm = 0.3108, lr_0 = 5.0184e-04
Loss = 1.2977e-02, PNorm = 136.5576, GNorm = 0.1440, lr_0 = 5.0150e-04
Loss = 1.3145e-02, PNorm = 136.5794, GNorm = 0.2897, lr_0 = 5.0115e-04
Loss = 1.2038e-02, PNorm = 136.5991, GNorm = 0.4285, lr_0 = 5.0081e-04
Loss = 1.2136e-02, PNorm = 136.6279, GNorm = 0.3467, lr_0 = 5.0047e-04
Loss = 1.1405e-02, PNorm = 136.6607, GNorm = 0.2498, lr_0 = 5.0012e-04
Loss = 1.5959e-02, PNorm = 136.6928, GNorm = 0.6110, lr_0 = 4.9978e-04
Loss = 1.1661e-02, PNorm = 136.7234, GNorm = 0.1189, lr_0 = 4.9944e-04
Loss = 1.1873e-02, PNorm = 136.7496, GNorm = 0.2403, lr_0 = 4.9910e-04
Loss = 1.2646e-02, PNorm = 136.7835, GNorm = 0.1397, lr_0 = 4.9875e-04
Loss = 1.0325e-02, PNorm = 136.8112, GNorm = 0.2005, lr_0 = 4.9841e-04
Loss = 1.6330e-02, PNorm = 136.8358, GNorm = 0.3194, lr_0 = 4.9807e-04
Loss = 1.2325e-02, PNorm = 136.8626, GNorm = 0.6327, lr_0 = 4.9773e-04
Loss = 1.4133e-02, PNorm = 136.8907, GNorm = 0.4181, lr_0 = 4.9739e-04
Loss = 1.0542e-02, PNorm = 136.9160, GNorm = 0.4379, lr_0 = 4.9705e-04
Loss = 1.2909e-02, PNorm = 136.9424, GNorm = 0.2932, lr_0 = 4.9671e-04
Loss = 1.0153e-02, PNorm = 136.9680, GNorm = 0.1425, lr_0 = 4.9637e-04
Loss = 1.1561e-02, PNorm = 136.9934, GNorm = 0.2519, lr_0 = 4.9603e-04
Loss = 1.3612e-02, PNorm = 137.0242, GNorm = 0.2149, lr_0 = 4.9569e-04
Loss = 1.2835e-02, PNorm = 137.0529, GNorm = 0.4942, lr_0 = 4.9535e-04
Loss = 1.0538e-02, PNorm = 137.0760, GNorm = 0.2651, lr_0 = 4.9501e-04
Loss = 1.5254e-02, PNorm = 137.1026, GNorm = 0.4216, lr_0 = 4.9467e-04
Loss = 1.5186e-02, PNorm = 137.1310, GNorm = 0.7538, lr_0 = 4.9433e-04
Loss = 1.2071e-02, PNorm = 137.1606, GNorm = 0.1906, lr_0 = 4.9399e-04
Loss = 1.1589e-02, PNorm = 137.1903, GNorm = 0.2449, lr_0 = 4.9365e-04
Loss = 1.4034e-02, PNorm = 137.2226, GNorm = 0.1361, lr_0 = 4.9332e-04
Loss = 1.2629e-02, PNorm = 137.2535, GNorm = 0.1198, lr_0 = 4.9298e-04
Loss = 1.1727e-02, PNorm = 137.2810, GNorm = 0.1363, lr_0 = 4.9264e-04
Loss = 1.2738e-02, PNorm = 137.3017, GNorm = 0.2224, lr_0 = 4.9230e-04
Loss = 1.1764e-02, PNorm = 137.3311, GNorm = 0.2818, lr_0 = 4.9197e-04
Loss = 1.0965e-02, PNorm = 137.3585, GNorm = 0.2904, lr_0 = 4.9163e-04
Loss = 1.3555e-02, PNorm = 137.3899, GNorm = 0.2428, lr_0 = 4.9129e-04
Loss = 1.1948e-02, PNorm = 137.4194, GNorm = 0.2412, lr_0 = 4.9095e-04
Loss = 1.4811e-02, PNorm = 137.4535, GNorm = 0.3565, lr_0 = 4.9062e-04
Loss = 1.3459e-02, PNorm = 137.4894, GNorm = 0.4329, lr_0 = 4.9028e-04
Loss = 1.3809e-02, PNorm = 137.5180, GNorm = 0.4553, lr_0 = 4.8995e-04
Loss = 1.3375e-02, PNorm = 137.5517, GNorm = 0.3808, lr_0 = 4.8961e-04
Loss = 1.1399e-02, PNorm = 137.5781, GNorm = 0.1165, lr_0 = 4.8928e-04
Loss = 1.1144e-02, PNorm = 137.6058, GNorm = 0.3437, lr_0 = 4.8894e-04
Loss = 1.5373e-02, PNorm = 137.6362, GNorm = 0.3527, lr_0 = 4.8861e-04
Loss = 1.1607e-02, PNorm = 137.6706, GNorm = 0.3954, lr_0 = 4.8827e-04
Loss = 1.1674e-02, PNorm = 137.6974, GNorm = 0.3857, lr_0 = 4.8794e-04
Loss = 1.3332e-02, PNorm = 137.7293, GNorm = 0.4589, lr_0 = 4.8760e-04
Loss = 1.4914e-02, PNorm = 137.7579, GNorm = 0.1404, lr_0 = 4.8727e-04
Loss = 1.2731e-02, PNorm = 137.7900, GNorm = 0.2604, lr_0 = 4.8693e-04
Loss = 1.3053e-02, PNorm = 137.8228, GNorm = 0.1577, lr_0 = 4.8660e-04
Loss = 1.2967e-02, PNorm = 137.8594, GNorm = 0.1909, lr_0 = 4.8627e-04
Loss = 1.1102e-02, PNorm = 137.8922, GNorm = 0.1448, lr_0 = 4.8593e-04
Loss = 1.2302e-02, PNorm = 137.9184, GNorm = 0.1026, lr_0 = 4.8560e-04
Loss = 1.2114e-02, PNorm = 137.9492, GNorm = 0.2323, lr_0 = 4.8527e-04
Loss = 1.2203e-02, PNorm = 137.9782, GNorm = 0.1940, lr_0 = 4.8494e-04
Loss = 1.5739e-02, PNorm = 138.0071, GNorm = 0.1059, lr_0 = 4.8460e-04
Loss = 1.3284e-02, PNorm = 138.0394, GNorm = 0.2624, lr_0 = 4.8427e-04
Loss = 1.2321e-02, PNorm = 138.0754, GNorm = 0.3055, lr_0 = 4.8394e-04
Loss = 9.6141e-03, PNorm = 138.1090, GNorm = 0.3874, lr_0 = 4.8361e-04
Loss = 1.2477e-02, PNorm = 138.1337, GNorm = 0.2434, lr_0 = 4.8328e-04
Loss = 9.8717e-03, PNorm = 138.1571, GNorm = 0.1491, lr_0 = 4.8295e-04
Loss = 1.2509e-02, PNorm = 138.1825, GNorm = 0.2534, lr_0 = 4.8262e-04
Loss = 1.2931e-02, PNorm = 138.2052, GNorm = 0.1886, lr_0 = 4.8228e-04
Loss = 1.4631e-02, PNorm = 138.2351, GNorm = 0.3014, lr_0 = 4.8195e-04
Loss = 1.2421e-02, PNorm = 138.2666, GNorm = 0.5416, lr_0 = 4.8162e-04
Loss = 1.5045e-02, PNorm = 138.3038, GNorm = 0.3529, lr_0 = 4.8129e-04
Loss = 1.2110e-02, PNorm = 138.3338, GNorm = 0.1319, lr_0 = 4.8096e-04
Loss = 1.1423e-02, PNorm = 138.3614, GNorm = 0.3315, lr_0 = 4.8064e-04
Loss = 1.2058e-02, PNorm = 138.3861, GNorm = 0.1960, lr_0 = 4.8031e-04
Loss = 1.1581e-02, PNorm = 138.4085, GNorm = 0.2663, lr_0 = 4.7998e-04
Loss = 1.1619e-02, PNorm = 138.4361, GNorm = 0.1664, lr_0 = 4.7965e-04
Loss = 1.1907e-02, PNorm = 138.4656, GNorm = 0.5000, lr_0 = 4.7932e-04
Loss = 1.2548e-02, PNorm = 138.4960, GNorm = 0.4303, lr_0 = 4.7899e-04
Loss = 1.1549e-02, PNorm = 138.5227, GNorm = 0.2247, lr_0 = 4.7866e-04
Loss = 9.7779e-03, PNorm = 138.5557, GNorm = 0.1483, lr_0 = 4.7833e-04
Loss = 1.0327e-02, PNorm = 138.5827, GNorm = 0.1893, lr_0 = 4.7801e-04
Loss = 1.1052e-02, PNorm = 138.6060, GNorm = 0.1625, lr_0 = 4.7768e-04
Loss = 1.3604e-02, PNorm = 138.6324, GNorm = 0.1604, lr_0 = 4.7735e-04
Loss = 1.4616e-02, PNorm = 138.6606, GNorm = 0.3658, lr_0 = 4.7703e-04
Validation mae = 0.479298
Epoch 11
Loss = 1.1712e-02, PNorm = 138.6825, GNorm = 0.2052, lr_0 = 4.7670e-04
Loss = 9.6807e-03, PNorm = 138.6966, GNorm = 0.1151, lr_0 = 4.7637e-04
Loss = 1.0278e-02, PNorm = 138.7152, GNorm = 0.3098, lr_0 = 4.7605e-04
Loss = 1.2380e-02, PNorm = 138.7297, GNorm = 0.4529, lr_0 = 4.7572e-04
Loss = 1.0216e-02, PNorm = 138.7533, GNorm = 0.4359, lr_0 = 4.7539e-04
Loss = 1.1086e-02, PNorm = 138.7780, GNorm = 0.2268, lr_0 = 4.7507e-04
Loss = 1.0503e-02, PNorm = 138.7988, GNorm = 0.5508, lr_0 = 4.7474e-04
Loss = 1.2562e-02, PNorm = 138.8177, GNorm = 0.2099, lr_0 = 4.7442e-04
Loss = 1.1450e-02, PNorm = 138.8353, GNorm = 0.1360, lr_0 = 4.7409e-04
Loss = 1.1422e-02, PNorm = 138.8553, GNorm = 0.5627, lr_0 = 4.7377e-04
Loss = 1.0744e-02, PNorm = 138.8772, GNorm = 0.1011, lr_0 = 4.7344e-04
Loss = 1.0743e-02, PNorm = 138.8987, GNorm = 0.2024, lr_0 = 4.7312e-04
Loss = 1.1382e-02, PNorm = 138.9188, GNorm = 0.2695, lr_0 = 4.7279e-04
Loss = 1.1644e-02, PNorm = 138.9437, GNorm = 0.2255, lr_0 = 4.7247e-04
Loss = 1.0576e-02, PNorm = 138.9700, GNorm = 0.2573, lr_0 = 4.7215e-04
Loss = 1.0356e-02, PNorm = 138.9948, GNorm = 0.5132, lr_0 = 4.7182e-04
Loss = 1.0115e-02, PNorm = 139.0165, GNorm = 0.5217, lr_0 = 4.7150e-04
Loss = 9.2966e-03, PNorm = 139.0342, GNorm = 0.6294, lr_0 = 4.7118e-04
Loss = 1.2173e-02, PNorm = 139.0538, GNorm = 0.4121, lr_0 = 4.7085e-04
Loss = 9.8635e-03, PNorm = 139.0737, GNorm = 0.1732, lr_0 = 4.7053e-04
Loss = 1.0365e-02, PNorm = 139.0926, GNorm = 0.1355, lr_0 = 4.7021e-04
Loss = 9.5078e-03, PNorm = 139.1118, GNorm = 0.1740, lr_0 = 4.6989e-04
Loss = 1.0837e-02, PNorm = 139.1343, GNorm = 0.4263, lr_0 = 4.6957e-04
Loss = 8.3502e-03, PNorm = 139.1581, GNorm = 0.5449, lr_0 = 4.6924e-04
Loss = 9.6123e-03, PNorm = 139.1823, GNorm = 0.2696, lr_0 = 4.6892e-04
Loss = 9.6570e-03, PNorm = 139.2021, GNorm = 0.3260, lr_0 = 4.6860e-04
Loss = 9.9067e-03, PNorm = 139.2239, GNorm = 0.1735, lr_0 = 4.6828e-04
Loss = 8.6178e-03, PNorm = 139.2404, GNorm = 0.2317, lr_0 = 4.6796e-04
Loss = 1.0420e-02, PNorm = 139.2585, GNorm = 0.1056, lr_0 = 4.6764e-04
Loss = 9.9154e-03, PNorm = 139.2800, GNorm = 0.3052, lr_0 = 4.6732e-04
Loss = 1.0432e-02, PNorm = 139.3052, GNorm = 0.2049, lr_0 = 4.6700e-04
Loss = 8.8451e-03, PNorm = 139.3318, GNorm = 0.4332, lr_0 = 4.6668e-04
Loss = 1.0640e-02, PNorm = 139.3560, GNorm = 0.4426, lr_0 = 4.6636e-04
Loss = 1.0632e-02, PNorm = 139.3762, GNorm = 0.3812, lr_0 = 4.6604e-04
Loss = 1.0573e-02, PNorm = 139.3972, GNorm = 0.2087, lr_0 = 4.6572e-04
Loss = 1.0312e-02, PNorm = 139.4236, GNorm = 0.1428, lr_0 = 4.6540e-04
Loss = 9.1436e-03, PNorm = 139.4516, GNorm = 0.1017, lr_0 = 4.6508e-04
Loss = 1.0103e-02, PNorm = 139.4771, GNorm = 0.1743, lr_0 = 4.6476e-04
Loss = 1.0450e-02, PNorm = 139.5002, GNorm = 0.1564, lr_0 = 4.6445e-04
Loss = 1.2016e-02, PNorm = 139.5200, GNorm = 0.1454, lr_0 = 4.6413e-04
Loss = 1.0115e-02, PNorm = 139.5411, GNorm = 0.1418, lr_0 = 4.6381e-04
Loss = 1.1537e-02, PNorm = 139.5632, GNorm = 0.2027, lr_0 = 4.6349e-04
Loss = 9.1932e-03, PNorm = 139.5858, GNorm = 0.3602, lr_0 = 4.6317e-04
Loss = 1.2239e-02, PNorm = 139.6073, GNorm = 0.2947, lr_0 = 4.6286e-04
Loss = 1.2787e-02, PNorm = 139.6314, GNorm = 0.2279, lr_0 = 4.6254e-04
Loss = 9.6390e-03, PNorm = 139.6553, GNorm = 0.4920, lr_0 = 4.6222e-04
Loss = 9.8099e-03, PNorm = 139.6765, GNorm = 0.1482, lr_0 = 4.6191e-04
Loss = 1.2436e-02, PNorm = 139.6981, GNorm = 0.1511, lr_0 = 4.6159e-04
Loss = 1.0010e-02, PNorm = 139.7204, GNorm = 0.1144, lr_0 = 4.6127e-04
Loss = 9.3583e-03, PNorm = 139.7415, GNorm = 0.1555, lr_0 = 4.6096e-04
Loss = 9.8525e-03, PNorm = 139.7640, GNorm = 0.3254, lr_0 = 4.6064e-04
Loss = 8.3885e-03, PNorm = 139.7893, GNorm = 0.1122, lr_0 = 4.6033e-04
Loss = 9.6490e-03, PNorm = 139.8147, GNorm = 0.1233, lr_0 = 4.6001e-04
Loss = 1.1583e-02, PNorm = 139.8358, GNorm = 0.2509, lr_0 = 4.5970e-04
Loss = 1.1157e-02, PNorm = 139.8603, GNorm = 0.7030, lr_0 = 4.5938e-04
Loss = 1.0499e-02, PNorm = 139.8870, GNorm = 0.1755, lr_0 = 4.5907e-04
Loss = 9.0995e-03, PNorm = 139.9114, GNorm = 0.3453, lr_0 = 4.5875e-04
Loss = 9.9618e-03, PNorm = 139.9344, GNorm = 0.3755, lr_0 = 4.5844e-04
Loss = 1.0561e-02, PNorm = 139.9638, GNorm = 0.5646, lr_0 = 4.5812e-04
Loss = 1.1535e-02, PNorm = 139.9905, GNorm = 0.4144, lr_0 = 4.5781e-04
Loss = 1.0048e-02, PNorm = 140.0135, GNorm = 0.1711, lr_0 = 4.5750e-04
Loss = 9.2587e-03, PNorm = 140.0317, GNorm = 0.2191, lr_0 = 4.5718e-04
Loss = 1.0296e-02, PNorm = 140.0501, GNorm = 0.2781, lr_0 = 4.5687e-04
Loss = 1.0021e-02, PNorm = 140.0727, GNorm = 0.5389, lr_0 = 4.5656e-04
Loss = 8.6537e-03, PNorm = 140.0962, GNorm = 0.4392, lr_0 = 4.5624e-04
Loss = 8.5270e-03, PNorm = 140.1198, GNorm = 0.1368, lr_0 = 4.5593e-04
Loss = 1.0425e-02, PNorm = 140.1438, GNorm = 0.1961, lr_0 = 4.5562e-04
Loss = 1.2334e-02, PNorm = 140.1675, GNorm = 0.2009, lr_0 = 4.5531e-04
Loss = 8.8776e-03, PNorm = 140.1879, GNorm = 0.2889, lr_0 = 4.5499e-04
Loss = 8.7943e-03, PNorm = 140.2066, GNorm = 0.1273, lr_0 = 4.5468e-04
Loss = 8.4889e-03, PNorm = 140.2269, GNorm = 0.2333, lr_0 = 4.5437e-04
Loss = 7.8893e-03, PNorm = 140.2468, GNorm = 0.0964, lr_0 = 4.5406e-04
Loss = 9.9263e-03, PNorm = 140.2714, GNorm = 0.1600, lr_0 = 4.5375e-04
Loss = 1.1160e-02, PNorm = 140.2935, GNorm = 0.2179, lr_0 = 4.5344e-04
Loss = 8.1919e-03, PNorm = 140.3155, GNorm = 0.2867, lr_0 = 4.5313e-04
Loss = 1.1862e-02, PNorm = 140.3318, GNorm = 0.1290, lr_0 = 4.5282e-04
Loss = 1.1420e-02, PNorm = 140.3494, GNorm = 0.1479, lr_0 = 4.5251e-04
Loss = 9.8358e-03, PNorm = 140.3680, GNorm = 0.1247, lr_0 = 4.5220e-04
Loss = 1.0196e-02, PNorm = 140.3915, GNorm = 0.2118, lr_0 = 4.5189e-04
Loss = 9.6055e-03, PNorm = 140.4121, GNorm = 0.3052, lr_0 = 4.5158e-04
Loss = 1.1360e-02, PNorm = 140.4413, GNorm = 0.3055, lr_0 = 4.5127e-04
Loss = 8.8029e-03, PNorm = 140.4700, GNorm = 0.1161, lr_0 = 4.5096e-04
Loss = 8.7141e-03, PNorm = 140.4975, GNorm = 0.2799, lr_0 = 4.5065e-04
Loss = 1.1133e-02, PNorm = 140.5228, GNorm = 0.1121, lr_0 = 4.5034e-04
Loss = 9.0310e-03, PNorm = 140.5484, GNorm = 0.2762, lr_0 = 4.5003e-04
Loss = 9.3699e-03, PNorm = 140.5693, GNorm = 0.2473, lr_0 = 4.4972e-04
Loss = 1.0954e-02, PNorm = 140.5883, GNorm = 0.4536, lr_0 = 4.4942e-04
Loss = 8.9150e-03, PNorm = 140.6124, GNorm = 0.1267, lr_0 = 4.4911e-04
Loss = 1.4339e-02, PNorm = 140.6386, GNorm = 0.3152, lr_0 = 4.4880e-04
Loss = 8.4369e-03, PNorm = 140.6610, GNorm = 0.2695, lr_0 = 4.4849e-04
Loss = 1.0721e-02, PNorm = 140.6802, GNorm = 0.3583, lr_0 = 4.4819e-04
Loss = 9.7510e-03, PNorm = 140.7034, GNorm = 0.1716, lr_0 = 4.4788e-04
Loss = 1.2507e-02, PNorm = 140.7274, GNorm = 0.6439, lr_0 = 4.4757e-04
Loss = 9.9361e-03, PNorm = 140.7551, GNorm = 0.1109, lr_0 = 4.4727e-04
Loss = 9.0099e-03, PNorm = 140.7804, GNorm = 0.2676, lr_0 = 4.4696e-04
Loss = 9.5801e-03, PNorm = 140.8064, GNorm = 0.3361, lr_0 = 4.4665e-04
Loss = 9.6959e-03, PNorm = 140.8289, GNorm = 0.1130, lr_0 = 4.4635e-04
Loss = 1.1889e-02, PNorm = 140.8548, GNorm = 0.4632, lr_0 = 4.4604e-04
Loss = 8.2454e-03, PNorm = 140.8797, GNorm = 0.2463, lr_0 = 4.4574e-04
Loss = 8.7345e-03, PNorm = 140.8973, GNorm = 0.3310, lr_0 = 4.4543e-04
Loss = 1.1372e-02, PNorm = 140.9213, GNorm = 0.4545, lr_0 = 4.4513e-04
Loss = 9.5865e-03, PNorm = 140.9495, GNorm = 0.2359, lr_0 = 4.4482e-04
Loss = 1.0299e-02, PNorm = 140.9741, GNorm = 0.4951, lr_0 = 4.4452e-04
Loss = 1.0619e-02, PNorm = 140.9999, GNorm = 0.1427, lr_0 = 4.4421e-04
Loss = 1.3339e-02, PNorm = 141.0246, GNorm = 0.5682, lr_0 = 4.4391e-04
Loss = 8.5508e-03, PNorm = 141.0489, GNorm = 0.0823, lr_0 = 4.4360e-04
Loss = 1.1345e-02, PNorm = 141.0690, GNorm = 0.3030, lr_0 = 4.4330e-04
Loss = 1.3136e-02, PNorm = 141.0939, GNorm = 0.6908, lr_0 = 4.4299e-04
Loss = 1.0324e-02, PNorm = 141.1157, GNorm = 0.2278, lr_0 = 4.4269e-04
Loss = 1.0807e-02, PNorm = 141.1386, GNorm = 0.4622, lr_0 = 4.4239e-04
Loss = 1.2861e-02, PNorm = 141.1692, GNorm = 0.1153, lr_0 = 4.4209e-04
Loss = 9.2981e-03, PNorm = 141.1970, GNorm = 0.1121, lr_0 = 4.4178e-04
Loss = 1.2161e-02, PNorm = 141.2220, GNorm = 0.1318, lr_0 = 4.4148e-04
Loss = 1.0377e-02, PNorm = 141.2448, GNorm = 0.1667, lr_0 = 4.4118e-04
Loss = 1.0889e-02, PNorm = 141.2688, GNorm = 0.2820, lr_0 = 4.4088e-04
Loss = 9.3882e-03, PNorm = 141.2907, GNorm = 0.1285, lr_0 = 4.4057e-04
Loss = 1.1204e-02, PNorm = 141.3165, GNorm = 0.3989, lr_0 = 4.4027e-04
Loss = 1.0612e-02, PNorm = 141.3435, GNorm = 0.3428, lr_0 = 4.3997e-04
Loss = 1.1342e-02, PNorm = 141.3653, GNorm = 0.2698, lr_0 = 4.3967e-04
Loss = 1.0704e-02, PNorm = 141.3899, GNorm = 0.2600, lr_0 = 4.3937e-04
Validation mae = 0.480738
Epoch 12
Loss = 9.2414e-03, PNorm = 141.4070, GNorm = 0.3745, lr_0 = 4.3907e-04
Loss = 9.6996e-03, PNorm = 141.4244, GNorm = 0.1485, lr_0 = 4.3877e-04
Loss = 8.3460e-03, PNorm = 141.4447, GNorm = 0.2478, lr_0 = 4.3846e-04
Loss = 9.9619e-03, PNorm = 141.4616, GNorm = 0.3535, lr_0 = 4.3816e-04
Loss = 8.7864e-03, PNorm = 141.4840, GNorm = 0.1878, lr_0 = 4.3786e-04
Loss = 8.5906e-03, PNorm = 141.5002, GNorm = 0.2625, lr_0 = 4.3756e-04
Loss = 9.0850e-03, PNorm = 141.5191, GNorm = 0.1111, lr_0 = 4.3726e-04
Loss = 8.5965e-03, PNorm = 141.5363, GNorm = 0.3385, lr_0 = 4.3696e-04
Loss = 1.2447e-02, PNorm = 141.5560, GNorm = 0.7620, lr_0 = 4.3667e-04
Loss = 8.5205e-03, PNorm = 141.5717, GNorm = 0.6252, lr_0 = 4.3637e-04
Loss = 8.5794e-03, PNorm = 141.5871, GNorm = 0.5005, lr_0 = 4.3607e-04
Loss = 8.5173e-03, PNorm = 141.6067, GNorm = 0.2579, lr_0 = 4.3577e-04
Loss = 8.0091e-03, PNorm = 141.6257, GNorm = 0.3236, lr_0 = 4.3547e-04
Loss = 7.6231e-03, PNorm = 141.6406, GNorm = 0.4311, lr_0 = 4.3517e-04
Loss = 9.1213e-03, PNorm = 141.6569, GNorm = 0.1513, lr_0 = 4.3487e-04
Loss = 7.1373e-03, PNorm = 141.6760, GNorm = 0.2546, lr_0 = 4.3458e-04
Loss = 7.6698e-03, PNorm = 141.6914, GNorm = 0.1518, lr_0 = 4.3428e-04
Loss = 8.5403e-03, PNorm = 141.7090, GNorm = 0.2221, lr_0 = 4.3398e-04
Loss = 8.8650e-03, PNorm = 141.7248, GNorm = 0.2917, lr_0 = 4.3368e-04
Loss = 7.9272e-03, PNorm = 141.7401, GNorm = 0.3486, lr_0 = 4.3339e-04
Loss = 7.6369e-03, PNorm = 141.7577, GNorm = 0.1854, lr_0 = 4.3309e-04
Loss = 9.7151e-03, PNorm = 141.7746, GNorm = 0.0748, lr_0 = 4.3279e-04
Loss = 8.0720e-03, PNorm = 141.7902, GNorm = 0.2578, lr_0 = 4.3250e-04
Loss = 7.9512e-03, PNorm = 141.8086, GNorm = 0.1952, lr_0 = 4.3220e-04
Loss = 8.8695e-03, PNorm = 141.8251, GNorm = 0.3388, lr_0 = 4.3190e-04
Loss = 6.3721e-03, PNorm = 141.8421, GNorm = 0.2781, lr_0 = 4.3161e-04
Loss = 7.7228e-03, PNorm = 141.8587, GNorm = 0.3256, lr_0 = 4.3131e-04
Loss = 8.6967e-03, PNorm = 141.8729, GNorm = 0.1570, lr_0 = 4.3102e-04
Loss = 8.4767e-03, PNorm = 141.8921, GNorm = 0.4267, lr_0 = 4.3072e-04
Loss = 1.2193e-02, PNorm = 141.9069, GNorm = 0.2557, lr_0 = 4.3043e-04
Loss = 9.1558e-03, PNorm = 141.9273, GNorm = 0.3162, lr_0 = 4.3013e-04
Loss = 7.6633e-03, PNorm = 141.9419, GNorm = 0.2544, lr_0 = 4.2984e-04
Loss = 9.8290e-03, PNorm = 141.9650, GNorm = 0.6382, lr_0 = 4.2954e-04
Loss = 9.9358e-03, PNorm = 141.9804, GNorm = 0.1698, lr_0 = 4.2925e-04
Loss = 6.6684e-03, PNorm = 141.9982, GNorm = 0.1740, lr_0 = 4.2895e-04
Loss = 7.5682e-03, PNorm = 142.0172, GNorm = 0.1521, lr_0 = 4.2866e-04
Loss = 9.4550e-03, PNorm = 142.0333, GNorm = 0.1121, lr_0 = 4.2837e-04
Loss = 7.7507e-03, PNorm = 142.0559, GNorm = 0.2473, lr_0 = 4.2807e-04
Loss = 1.0347e-02, PNorm = 142.0737, GNorm = 0.3324, lr_0 = 4.2778e-04
Loss = 7.9339e-03, PNorm = 142.0954, GNorm = 0.1083, lr_0 = 4.2749e-04
Loss = 7.8187e-03, PNorm = 142.1110, GNorm = 0.1674, lr_0 = 4.2719e-04
Loss = 7.2823e-03, PNorm = 142.1288, GNorm = 0.1976, lr_0 = 4.2690e-04
Loss = 7.9244e-03, PNorm = 142.1482, GNorm = 0.2394, lr_0 = 4.2661e-04
Loss = 8.3887e-03, PNorm = 142.1689, GNorm = 0.3512, lr_0 = 4.2632e-04
Loss = 7.3258e-03, PNorm = 142.1877, GNorm = 0.2298, lr_0 = 4.2602e-04
Loss = 8.6079e-03, PNorm = 142.2130, GNorm = 0.3095, lr_0 = 4.2573e-04
Loss = 8.0472e-03, PNorm = 142.2413, GNorm = 0.2641, lr_0 = 4.2544e-04
Loss = 7.8444e-03, PNorm = 142.2574, GNorm = 0.1743, lr_0 = 4.2515e-04
Loss = 7.4715e-03, PNorm = 142.2736, GNorm = 0.3400, lr_0 = 4.2486e-04
Loss = 8.8687e-03, PNorm = 142.2934, GNorm = 0.1085, lr_0 = 4.2457e-04
Loss = 9.0749e-03, PNorm = 142.3097, GNorm = 0.4050, lr_0 = 4.2428e-04
Loss = 9.8391e-03, PNorm = 142.3327, GNorm = 0.3947, lr_0 = 4.2399e-04
Loss = 8.3429e-03, PNorm = 142.3496, GNorm = 0.1259, lr_0 = 4.2370e-04
Loss = 9.2175e-03, PNorm = 142.3676, GNorm = 0.2000, lr_0 = 4.2340e-04
Loss = 8.9817e-03, PNorm = 142.3878, GNorm = 0.1044, lr_0 = 4.2311e-04
Loss = 9.9530e-03, PNorm = 142.4084, GNorm = 0.5934, lr_0 = 4.2283e-04
Loss = 8.3588e-03, PNorm = 142.4307, GNorm = 0.2794, lr_0 = 4.2254e-04
Loss = 7.6397e-03, PNorm = 142.4494, GNorm = 0.2758, lr_0 = 4.2225e-04
Loss = 7.1020e-03, PNorm = 142.4707, GNorm = 0.0864, lr_0 = 4.2196e-04
Loss = 8.9038e-03, PNorm = 142.4902, GNorm = 0.1914, lr_0 = 4.2167e-04
Loss = 6.6166e-03, PNorm = 142.5082, GNorm = 0.1308, lr_0 = 4.2138e-04
Loss = 8.3559e-03, PNorm = 142.5279, GNorm = 0.3236, lr_0 = 4.2109e-04
Loss = 8.8458e-03, PNorm = 142.5461, GNorm = 0.1301, lr_0 = 4.2080e-04
Loss = 8.3461e-03, PNorm = 142.5595, GNorm = 0.2006, lr_0 = 4.2051e-04
Loss = 8.4175e-03, PNorm = 142.5734, GNorm = 0.3447, lr_0 = 4.2023e-04
Loss = 7.5250e-03, PNorm = 142.5905, GNorm = 0.0861, lr_0 = 4.1994e-04
Loss = 7.3799e-03, PNorm = 142.6089, GNorm = 0.1065, lr_0 = 4.1965e-04
Loss = 8.2550e-03, PNorm = 142.6296, GNorm = 0.2674, lr_0 = 4.1936e-04
Loss = 7.9973e-03, PNorm = 142.6457, GNorm = 0.1288, lr_0 = 4.1907e-04
Loss = 8.1970e-03, PNorm = 142.6625, GNorm = 0.1536, lr_0 = 4.1879e-04
Loss = 8.2501e-03, PNorm = 142.6805, GNorm = 0.1708, lr_0 = 4.1850e-04
Loss = 7.8657e-03, PNorm = 142.7000, GNorm = 0.1501, lr_0 = 4.1821e-04
Loss = 9.5947e-03, PNorm = 142.7208, GNorm = 0.1016, lr_0 = 4.1793e-04
Loss = 7.4134e-03, PNorm = 142.7387, GNorm = 0.2774, lr_0 = 4.1764e-04
Loss = 1.2035e-02, PNorm = 142.7547, GNorm = 0.3442, lr_0 = 4.1736e-04
Loss = 7.7776e-03, PNorm = 142.7777, GNorm = 0.2345, lr_0 = 4.1707e-04
Loss = 9.4686e-03, PNorm = 142.8004, GNorm = 0.1221, lr_0 = 4.1678e-04
Loss = 1.0034e-02, PNorm = 142.8209, GNorm = 0.3672, lr_0 = 4.1650e-04
Loss = 6.9411e-03, PNorm = 142.8437, GNorm = 0.4126, lr_0 = 4.1621e-04
Loss = 8.8746e-03, PNorm = 142.8615, GNorm = 0.4140, lr_0 = 4.1593e-04
Loss = 8.6592e-03, PNorm = 142.8776, GNorm = 0.2613, lr_0 = 4.1564e-04
Loss = 7.7405e-03, PNorm = 142.8943, GNorm = 0.2298, lr_0 = 4.1536e-04
Loss = 7.9737e-03, PNorm = 142.9167, GNorm = 0.2685, lr_0 = 4.1507e-04
Loss = 9.3700e-03, PNorm = 142.9378, GNorm = 0.3493, lr_0 = 4.1479e-04
Loss = 8.0505e-03, PNorm = 142.9599, GNorm = 0.5181, lr_0 = 4.1450e-04
Loss = 8.6722e-03, PNorm = 142.9793, GNorm = 0.1901, lr_0 = 4.1422e-04
Loss = 8.0301e-03, PNorm = 142.9943, GNorm = 0.3614, lr_0 = 4.1394e-04
Loss = 8.7888e-03, PNorm = 143.0077, GNorm = 0.1924, lr_0 = 4.1365e-04
Loss = 8.0451e-03, PNorm = 143.0273, GNorm = 0.2778, lr_0 = 4.1337e-04
Loss = 8.0141e-03, PNorm = 143.0506, GNorm = 0.1341, lr_0 = 4.1309e-04
Loss = 7.4898e-03, PNorm = 143.0740, GNorm = 0.3100, lr_0 = 4.1280e-04
Loss = 9.0904e-03, PNorm = 143.0901, GNorm = 0.2147, lr_0 = 4.1252e-04
Loss = 7.8074e-03, PNorm = 143.1113, GNorm = 0.1488, lr_0 = 4.1224e-04
Loss = 8.4939e-03, PNorm = 143.1287, GNorm = 0.4692, lr_0 = 4.1196e-04
Loss = 7.5467e-03, PNorm = 143.1503, GNorm = 0.1036, lr_0 = 4.1167e-04
Loss = 7.6354e-03, PNorm = 143.1700, GNorm = 0.2508, lr_0 = 4.1139e-04
Loss = 7.9755e-03, PNorm = 143.1878, GNorm = 0.2513, lr_0 = 4.1111e-04
Loss = 8.4695e-03, PNorm = 143.2090, GNorm = 0.1842, lr_0 = 4.1083e-04
Loss = 9.1002e-03, PNorm = 143.2282, GNorm = 0.5384, lr_0 = 4.1055e-04
Loss = 8.8320e-03, PNorm = 143.2511, GNorm = 0.4634, lr_0 = 4.1027e-04
Loss = 8.5965e-03, PNorm = 143.2767, GNorm = 0.2984, lr_0 = 4.0998e-04
Loss = 9.9272e-03, PNorm = 143.2978, GNorm = 0.8594, lr_0 = 4.0970e-04
Loss = 1.0271e-02, PNorm = 143.3206, GNorm = 0.2900, lr_0 = 4.0942e-04
Loss = 8.4091e-03, PNorm = 143.3430, GNorm = 0.2569, lr_0 = 4.0914e-04
Loss = 8.1962e-03, PNorm = 143.3651, GNorm = 0.1842, lr_0 = 4.0886e-04
Loss = 8.4174e-03, PNorm = 143.3825, GNorm = 0.1082, lr_0 = 4.0858e-04
Loss = 9.2704e-03, PNorm = 143.4090, GNorm = 0.2963, lr_0 = 4.0830e-04
Loss = 9.7854e-03, PNorm = 143.4312, GNorm = 0.2639, lr_0 = 4.0802e-04
Loss = 8.9895e-03, PNorm = 143.4512, GNorm = 0.2642, lr_0 = 4.0774e-04
Loss = 8.3962e-03, PNorm = 143.4743, GNorm = 0.3293, lr_0 = 4.0746e-04
Loss = 9.6290e-03, PNorm = 143.4974, GNorm = 0.4781, lr_0 = 4.0718e-04
Loss = 1.1379e-02, PNorm = 143.5150, GNorm = 0.1421, lr_0 = 4.0691e-04
Loss = 8.7919e-03, PNorm = 143.5325, GNorm = 0.3242, lr_0 = 4.0663e-04
Loss = 8.0125e-03, PNorm = 143.5520, GNorm = 0.3809, lr_0 = 4.0635e-04
Loss = 8.9183e-03, PNorm = 143.5748, GNorm = 0.6827, lr_0 = 4.0607e-04
Loss = 7.9755e-03, PNorm = 143.5956, GNorm = 0.0940, lr_0 = 4.0579e-04
Loss = 1.0931e-02, PNorm = 143.6173, GNorm = 0.1860, lr_0 = 4.0551e-04
Loss = 1.1063e-02, PNorm = 143.6366, GNorm = 0.1593, lr_0 = 4.0524e-04
Loss = 9.1380e-03, PNorm = 143.6590, GNorm = 0.4799, lr_0 = 4.0496e-04
Loss = 1.2803e-02, PNorm = 143.6857, GNorm = 0.1761, lr_0 = 4.0468e-04
Validation mae = 0.478878
Epoch 13
Loss = 7.7723e-03, PNorm = 143.7025, GNorm = 0.0936, lr_0 = 4.0440e-04
Loss = 1.2240e-02, PNorm = 143.7166, GNorm = 0.3333, lr_0 = 4.0413e-04
Loss = 7.5590e-03, PNorm = 143.7333, GNorm = 0.2054, lr_0 = 4.0385e-04
Loss = 7.8075e-03, PNorm = 143.7481, GNorm = 0.2893, lr_0 = 4.0357e-04
Loss = 7.1182e-03, PNorm = 143.7618, GNorm = 0.1958, lr_0 = 4.0330e-04
Loss = 7.3913e-03, PNorm = 143.7784, GNorm = 0.4226, lr_0 = 4.0302e-04
Loss = 7.2662e-03, PNorm = 143.7943, GNorm = 0.1085, lr_0 = 4.0274e-04
Loss = 6.4384e-03, PNorm = 143.8081, GNorm = 0.2503, lr_0 = 4.0247e-04
Loss = 6.6352e-03, PNorm = 143.8216, GNorm = 0.2773, lr_0 = 4.0219e-04
Loss = 6.9134e-03, PNorm = 143.8399, GNorm = 0.0693, lr_0 = 4.0192e-04
Loss = 6.8528e-03, PNorm = 143.8532, GNorm = 0.2435, lr_0 = 4.0164e-04
Loss = 7.7609e-03, PNorm = 143.8655, GNorm = 0.0802, lr_0 = 4.0137e-04
Loss = 7.5645e-03, PNorm = 143.8797, GNorm = 0.2538, lr_0 = 4.0109e-04
Loss = 6.8928e-03, PNorm = 143.8933, GNorm = 0.1407, lr_0 = 4.0082e-04
Loss = 6.8885e-03, PNorm = 143.9054, GNorm = 0.1438, lr_0 = 4.0054e-04
Loss = 8.8100e-03, PNorm = 143.9232, GNorm = 0.2796, lr_0 = 4.0027e-04
Loss = 6.3196e-03, PNorm = 143.9349, GNorm = 0.2137, lr_0 = 3.9999e-04
Loss = 8.0341e-03, PNorm = 143.9528, GNorm = 0.2222, lr_0 = 3.9972e-04
Loss = 6.8687e-03, PNorm = 143.9685, GNorm = 0.0753, lr_0 = 3.9945e-04
Loss = 6.0492e-03, PNorm = 143.9818, GNorm = 0.1420, lr_0 = 3.9917e-04
Loss = 7.5664e-03, PNorm = 143.9964, GNorm = 0.3116, lr_0 = 3.9890e-04
Loss = 8.5768e-03, PNorm = 144.0118, GNorm = 0.1177, lr_0 = 3.9863e-04
Loss = 6.9317e-03, PNorm = 144.0269, GNorm = 0.1300, lr_0 = 3.9835e-04
Loss = 8.6140e-03, PNorm = 144.0460, GNorm = 0.1273, lr_0 = 3.9808e-04
Loss = 7.9733e-03, PNorm = 144.0626, GNorm = 0.1654, lr_0 = 3.9781e-04
Loss = 8.9499e-03, PNorm = 144.0787, GNorm = 0.3599, lr_0 = 3.9753e-04
Loss = 9.6105e-03, PNorm = 144.0960, GNorm = 0.2147, lr_0 = 3.9726e-04
Loss = 7.4355e-03, PNorm = 144.1142, GNorm = 0.3653, lr_0 = 3.9699e-04
Loss = 6.4311e-03, PNorm = 144.1303, GNorm = 0.4765, lr_0 = 3.9672e-04
Loss = 1.0969e-02, PNorm = 144.1478, GNorm = 0.1692, lr_0 = 3.9645e-04
Loss = 6.1739e-03, PNorm = 144.1726, GNorm = 0.2991, lr_0 = 3.9617e-04
Loss = 7.2525e-03, PNorm = 144.1912, GNorm = 0.3702, lr_0 = 3.9590e-04
Loss = 6.2628e-03, PNorm = 144.2062, GNorm = 0.2607, lr_0 = 3.9563e-04
Loss = 6.1815e-03, PNorm = 144.2175, GNorm = 0.5534, lr_0 = 3.9536e-04
Loss = 6.3930e-03, PNorm = 144.2329, GNorm = 0.2138, lr_0 = 3.9509e-04
Loss = 6.6941e-03, PNorm = 144.2481, GNorm = 0.1932, lr_0 = 3.9482e-04
Loss = 6.5387e-03, PNorm = 144.2628, GNorm = 0.2763, lr_0 = 3.9455e-04
Loss = 6.7611e-03, PNorm = 144.2732, GNorm = 0.0728, lr_0 = 3.9428e-04
Loss = 7.0026e-03, PNorm = 144.2872, GNorm = 0.3365, lr_0 = 3.9401e-04
Loss = 9.2617e-03, PNorm = 144.3013, GNorm = 0.1804, lr_0 = 3.9374e-04
Loss = 6.3786e-03, PNorm = 144.3158, GNorm = 0.1342, lr_0 = 3.9347e-04
Loss = 6.3519e-03, PNorm = 144.3347, GNorm = 0.3186, lr_0 = 3.9320e-04
Loss = 6.3187e-03, PNorm = 144.3473, GNorm = 0.1056, lr_0 = 3.9293e-04
Loss = 7.4388e-03, PNorm = 144.3616, GNorm = 0.1160, lr_0 = 3.9266e-04
Loss = 6.8571e-03, PNorm = 144.3772, GNorm = 0.0899, lr_0 = 3.9239e-04
Loss = 7.3883e-03, PNorm = 144.3923, GNorm = 0.1098, lr_0 = 3.9212e-04
Loss = 8.1009e-03, PNorm = 144.4049, GNorm = 0.2806, lr_0 = 3.9185e-04
Loss = 7.4765e-03, PNorm = 144.4199, GNorm = 0.3369, lr_0 = 3.9159e-04
Loss = 7.2981e-03, PNorm = 144.4358, GNorm = 0.2216, lr_0 = 3.9132e-04
Loss = 6.9629e-03, PNorm = 144.4566, GNorm = 0.1636, lr_0 = 3.9105e-04
Loss = 6.2311e-03, PNorm = 144.4733, GNorm = 0.3976, lr_0 = 3.9078e-04
Loss = 8.4925e-03, PNorm = 144.4874, GNorm = 0.3241, lr_0 = 3.9051e-04
Loss = 8.1899e-03, PNorm = 144.5021, GNorm = 0.1050, lr_0 = 3.9025e-04
Loss = 5.8395e-03, PNorm = 144.5188, GNorm = 0.3445, lr_0 = 3.8998e-04
Loss = 7.0195e-03, PNorm = 144.5360, GNorm = 0.1005, lr_0 = 3.8971e-04
Loss = 5.8405e-03, PNorm = 144.5533, GNorm = 0.1552, lr_0 = 3.8945e-04
Loss = 5.6371e-03, PNorm = 144.5690, GNorm = 0.3088, lr_0 = 3.8918e-04
Loss = 7.8739e-03, PNorm = 144.5841, GNorm = 0.1454, lr_0 = 3.8891e-04
Loss = 6.0437e-03, PNorm = 144.6005, GNorm = 0.1838, lr_0 = 3.8865e-04
Loss = 6.7008e-03, PNorm = 144.6151, GNorm = 0.2744, lr_0 = 3.8838e-04
Loss = 6.9093e-03, PNorm = 144.6301, GNorm = 0.3028, lr_0 = 3.8811e-04
Loss = 5.5483e-03, PNorm = 144.6482, GNorm = 0.1258, lr_0 = 3.8785e-04
Loss = 8.1437e-03, PNorm = 144.6569, GNorm = 0.1854, lr_0 = 3.8758e-04
Loss = 6.7347e-03, PNorm = 144.6671, GNorm = 0.1514, lr_0 = 3.8732e-04
Loss = 6.2208e-03, PNorm = 144.6840, GNorm = 0.1678, lr_0 = 3.8705e-04
Loss = 6.5324e-03, PNorm = 144.7023, GNorm = 0.1307, lr_0 = 3.8679e-04
Loss = 6.3457e-03, PNorm = 144.7147, GNorm = 0.1022, lr_0 = 3.8652e-04
Loss = 8.3353e-03, PNorm = 144.7344, GNorm = 0.1122, lr_0 = 3.8626e-04
Loss = 6.8167e-03, PNorm = 144.7587, GNorm = 0.0944, lr_0 = 3.8599e-04
Loss = 6.2812e-03, PNorm = 144.7771, GNorm = 0.5238, lr_0 = 3.8573e-04
Loss = 7.7707e-03, PNorm = 144.7947, GNorm = 0.1902, lr_0 = 3.8546e-04
Loss = 6.6288e-03, PNorm = 144.8107, GNorm = 0.1613, lr_0 = 3.8520e-04
Loss = 6.1407e-03, PNorm = 144.8309, GNorm = 0.1317, lr_0 = 3.8493e-04
Loss = 7.0606e-03, PNorm = 144.8443, GNorm = 0.3340, lr_0 = 3.8467e-04
Loss = 6.9625e-03, PNorm = 144.8603, GNorm = 0.1467, lr_0 = 3.8441e-04
Loss = 7.6324e-03, PNorm = 144.8733, GNorm = 0.5993, lr_0 = 3.8414e-04
Loss = 6.2244e-03, PNorm = 144.8883, GNorm = 0.2231, lr_0 = 3.8388e-04
Loss = 6.5141e-03, PNorm = 144.9107, GNorm = 0.2261, lr_0 = 3.8362e-04
Loss = 7.2401e-03, PNorm = 144.9302, GNorm = 0.1887, lr_0 = 3.8336e-04
Loss = 8.1290e-03, PNorm = 144.9465, GNorm = 0.2713, lr_0 = 3.8309e-04
Loss = 8.5781e-03, PNorm = 144.9663, GNorm = 0.2266, lr_0 = 3.8283e-04
Loss = 6.0991e-03, PNorm = 144.9832, GNorm = 0.4239, lr_0 = 3.8257e-04
Loss = 6.5223e-03, PNorm = 144.9932, GNorm = 0.0647, lr_0 = 3.8231e-04
Loss = 6.4520e-03, PNorm = 145.0104, GNorm = 0.1543, lr_0 = 3.8204e-04
Loss = 6.8260e-03, PNorm = 145.0301, GNorm = 0.2065, lr_0 = 3.8178e-04
Loss = 6.1401e-03, PNorm = 145.0478, GNorm = 0.2339, lr_0 = 3.8152e-04
Loss = 7.3131e-03, PNorm = 145.0696, GNorm = 0.3365, lr_0 = 3.8126e-04
Loss = 7.1606e-03, PNorm = 145.0903, GNorm = 0.7269, lr_0 = 3.8100e-04
Loss = 8.0547e-03, PNorm = 145.1061, GNorm = 0.4836, lr_0 = 3.8074e-04
Loss = 6.1170e-03, PNorm = 145.1198, GNorm = 0.0979, lr_0 = 3.8048e-04
Loss = 6.8774e-03, PNorm = 145.1364, GNorm = 0.2253, lr_0 = 3.8022e-04
Loss = 6.8625e-03, PNorm = 145.1480, GNorm = 0.1214, lr_0 = 3.7995e-04
Loss = 7.2555e-03, PNorm = 145.1622, GNorm = 0.3768, lr_0 = 3.7969e-04
Loss = 7.0466e-03, PNorm = 145.1813, GNorm = 0.3529, lr_0 = 3.7943e-04
Loss = 6.0263e-03, PNorm = 145.1983, GNorm = 0.0958, lr_0 = 3.7917e-04
Loss = 6.8764e-03, PNorm = 145.2110, GNorm = 0.1260, lr_0 = 3.7891e-04
Loss = 7.2224e-03, PNorm = 145.2255, GNorm = 0.1283, lr_0 = 3.7866e-04
Loss = 8.3606e-03, PNorm = 145.2466, GNorm = 0.3762, lr_0 = 3.7840e-04
Loss = 9.4057e-03, PNorm = 145.2625, GNorm = 0.2976, lr_0 = 3.7814e-04
Loss = 7.3850e-03, PNorm = 145.2814, GNorm = 0.0937, lr_0 = 3.7788e-04
Loss = 7.6046e-03, PNorm = 145.3002, GNorm = 0.4891, lr_0 = 3.7762e-04
Loss = 6.3848e-03, PNorm = 145.3187, GNorm = 0.1248, lr_0 = 3.7736e-04
Loss = 8.7936e-03, PNorm = 145.3349, GNorm = 0.7770, lr_0 = 3.7710e-04
Loss = 8.1839e-03, PNorm = 145.3518, GNorm = 0.3692, lr_0 = 3.7684e-04
Loss = 7.0743e-03, PNorm = 145.3672, GNorm = 0.2571, lr_0 = 3.7659e-04
Loss = 6.4305e-03, PNorm = 145.3877, GNorm = 0.4712, lr_0 = 3.7633e-04
Loss = 6.9545e-03, PNorm = 145.4061, GNorm = 0.1674, lr_0 = 3.7607e-04
Loss = 6.5935e-03, PNorm = 145.4225, GNorm = 0.4788, lr_0 = 3.7581e-04
Loss = 8.6147e-03, PNorm = 145.4403, GNorm = 0.2690, lr_0 = 3.7555e-04
Loss = 6.2958e-03, PNorm = 145.4595, GNorm = 0.2293, lr_0 = 3.7530e-04
Loss = 7.1667e-03, PNorm = 145.4795, GNorm = 0.1687, lr_0 = 3.7504e-04
Loss = 7.2791e-03, PNorm = 145.4974, GNorm = 0.2825, lr_0 = 3.7478e-04
Loss = 8.2428e-03, PNorm = 145.5165, GNorm = 0.1663, lr_0 = 3.7453e-04
Loss = 7.3455e-03, PNorm = 145.5381, GNorm = 0.2283, lr_0 = 3.7427e-04
Loss = 7.0886e-03, PNorm = 145.5582, GNorm = 0.1594, lr_0 = 3.7401e-04
Loss = 7.2588e-03, PNorm = 145.5773, GNorm = 0.1220, lr_0 = 3.7376e-04
Loss = 1.0498e-02, PNorm = 145.5952, GNorm = 0.1591, lr_0 = 3.7350e-04
Loss = 7.3027e-03, PNorm = 145.6149, GNorm = 0.1094, lr_0 = 3.7325e-04
Loss = 7.6023e-03, PNorm = 145.6338, GNorm = 0.8247, lr_0 = 3.7299e-04
Loss = 1.0382e-02, PNorm = 145.6440, GNorm = 0.1511, lr_0 = 3.7273e-04
Validation mae = 0.478071
Epoch 14
Loss = 6.0243e-03, PNorm = 145.6567, GNorm = 0.1874, lr_0 = 3.7248e-04
Loss = 5.9911e-03, PNorm = 145.6670, GNorm = 0.2181, lr_0 = 3.7222e-04
Loss = 6.9083e-03, PNorm = 145.6799, GNorm = 0.6102, lr_0 = 3.7197e-04
Loss = 6.5781e-03, PNorm = 145.6951, GNorm = 0.2058, lr_0 = 3.7171e-04
Loss = 5.5675e-03, PNorm = 145.7104, GNorm = 0.1313, lr_0 = 3.7146e-04
Loss = 6.6157e-03, PNorm = 145.7242, GNorm = 0.4096, lr_0 = 3.7120e-04
Loss = 6.0270e-03, PNorm = 145.7369, GNorm = 0.3467, lr_0 = 3.7095e-04
Loss = 8.0216e-03, PNorm = 145.7466, GNorm = 0.2424, lr_0 = 3.7070e-04
Loss = 6.8127e-03, PNorm = 145.7619, GNorm = 0.0858, lr_0 = 3.7044e-04
Loss = 6.2127e-03, PNorm = 145.7762, GNorm = 0.4252, lr_0 = 3.7019e-04
Loss = 6.3571e-03, PNorm = 145.7900, GNorm = 0.1424, lr_0 = 3.6993e-04
Loss = 7.4945e-03, PNorm = 145.8041, GNorm = 0.1652, lr_0 = 3.6968e-04
Loss = 6.8794e-03, PNorm = 145.8204, GNorm = 0.1092, lr_0 = 3.6943e-04
Loss = 5.7287e-03, PNorm = 145.8345, GNorm = 0.1501, lr_0 = 3.6917e-04
Loss = 5.9050e-03, PNorm = 145.8470, GNorm = 0.2408, lr_0 = 3.6892e-04
Loss = 6.5715e-03, PNorm = 145.8593, GNorm = 0.2481, lr_0 = 3.6867e-04
Loss = 5.9675e-03, PNorm = 145.8736, GNorm = 0.3286, lr_0 = 3.6842e-04
Loss = 6.5401e-03, PNorm = 145.8885, GNorm = 0.2130, lr_0 = 3.6816e-04
Loss = 5.3887e-03, PNorm = 145.8981, GNorm = 0.3187, lr_0 = 3.6791e-04
Loss = 6.8019e-03, PNorm = 145.9093, GNorm = 0.1592, lr_0 = 3.6766e-04
Loss = 5.5839e-03, PNorm = 145.9228, GNorm = 0.1988, lr_0 = 3.6741e-04
Loss = 6.6606e-03, PNorm = 145.9388, GNorm = 0.1553, lr_0 = 3.6716e-04
Loss = 6.0178e-03, PNorm = 145.9526, GNorm = 0.3564, lr_0 = 3.6690e-04
Loss = 5.2792e-03, PNorm = 145.9665, GNorm = 0.1919, lr_0 = 3.6665e-04
Loss = 6.5475e-03, PNorm = 145.9725, GNorm = 0.1940, lr_0 = 3.6640e-04
Loss = 7.3358e-03, PNorm = 145.9821, GNorm = 0.2571, lr_0 = 3.6615e-04
Loss = 6.3688e-03, PNorm = 145.9964, GNorm = 0.3132, lr_0 = 3.6590e-04
Loss = 8.2121e-03, PNorm = 146.0121, GNorm = 0.0672, lr_0 = 3.6565e-04
Loss = 5.9181e-03, PNorm = 146.0292, GNorm = 0.0831, lr_0 = 3.6540e-04
Loss = 7.3118e-03, PNorm = 146.0452, GNorm = 0.2177, lr_0 = 3.6515e-04
Loss = 5.1135e-03, PNorm = 146.0587, GNorm = 0.1394, lr_0 = 3.6490e-04
Loss = 6.3155e-03, PNorm = 146.0655, GNorm = 0.0992, lr_0 = 3.6465e-04
Loss = 5.2168e-03, PNorm = 146.0774, GNorm = 0.1397, lr_0 = 3.6440e-04
Loss = 5.5667e-03, PNorm = 146.0886, GNorm = 0.4382, lr_0 = 3.6415e-04
Loss = 5.9519e-03, PNorm = 146.1014, GNorm = 0.1375, lr_0 = 3.6390e-04
Loss = 6.0318e-03, PNorm = 146.1120, GNorm = 0.1759, lr_0 = 3.6365e-04
Loss = 5.5467e-03, PNorm = 146.1226, GNorm = 0.1060, lr_0 = 3.6340e-04
Loss = 6.2119e-03, PNorm = 146.1351, GNorm = 0.1886, lr_0 = 3.6315e-04
Loss = 5.7185e-03, PNorm = 146.1502, GNorm = 0.2342, lr_0 = 3.6290e-04
Loss = 5.0865e-03, PNorm = 146.1660, GNorm = 0.2585, lr_0 = 3.6266e-04
Loss = 5.4888e-03, PNorm = 146.1777, GNorm = 0.2477, lr_0 = 3.6241e-04
Loss = 5.2057e-03, PNorm = 146.1904, GNorm = 0.3212, lr_0 = 3.6216e-04
Loss = 6.1377e-03, PNorm = 146.2028, GNorm = 0.4657, lr_0 = 3.6191e-04
Loss = 5.6672e-03, PNorm = 146.2162, GNorm = 0.1746, lr_0 = 3.6166e-04
Loss = 5.6939e-03, PNorm = 146.2267, GNorm = 0.3654, lr_0 = 3.6141e-04
Loss = 1.1193e-02, PNorm = 146.2364, GNorm = 0.5764, lr_0 = 3.6117e-04
Loss = 6.1928e-03, PNorm = 146.2540, GNorm = 0.4757, lr_0 = 3.6092e-04
Loss = 8.2923e-03, PNorm = 146.2695, GNorm = 0.1530, lr_0 = 3.6067e-04
Loss = 7.3433e-03, PNorm = 146.2829, GNorm = 0.4039, lr_0 = 3.6043e-04
Loss = 5.2698e-03, PNorm = 146.2993, GNorm = 0.3078, lr_0 = 3.6018e-04
Loss = 6.7699e-03, PNorm = 146.3118, GNorm = 0.2708, lr_0 = 3.5993e-04
Loss = 5.9418e-03, PNorm = 146.3252, GNorm = 0.2405, lr_0 = 3.5969e-04
Loss = 5.3873e-03, PNorm = 146.3401, GNorm = 0.1052, lr_0 = 3.5944e-04
Loss = 5.6332e-03, PNorm = 146.3503, GNorm = 0.0928, lr_0 = 3.5919e-04
Loss = 6.1814e-03, PNorm = 146.3674, GNorm = 0.1127, lr_0 = 3.5895e-04
Loss = 7.2129e-03, PNorm = 146.3845, GNorm = 0.1945, lr_0 = 3.5870e-04
Loss = 5.2841e-03, PNorm = 146.4003, GNorm = 0.2358, lr_0 = 3.5845e-04
Loss = 6.3203e-03, PNorm = 146.4133, GNorm = 0.2108, lr_0 = 3.5821e-04
Loss = 5.1029e-03, PNorm = 146.4273, GNorm = 0.1787, lr_0 = 3.5796e-04
Loss = 6.8586e-03, PNorm = 146.4394, GNorm = 0.1321, lr_0 = 3.5772e-04
Loss = 6.6076e-03, PNorm = 146.4533, GNorm = 0.0944, lr_0 = 3.5747e-04
Loss = 5.7092e-03, PNorm = 146.4722, GNorm = 0.1381, lr_0 = 3.5723e-04
Loss = 5.1853e-03, PNorm = 146.4857, GNorm = 0.2340, lr_0 = 3.5698e-04
Loss = 6.2244e-03, PNorm = 146.4986, GNorm = 0.1226, lr_0 = 3.5674e-04
Loss = 5.8706e-03, PNorm = 146.5136, GNorm = 0.1761, lr_0 = 3.5650e-04
Loss = 6.6735e-03, PNorm = 146.5296, GNorm = 0.0842, lr_0 = 3.5625e-04
Loss = 5.0833e-03, PNorm = 146.5423, GNorm = 0.2037, lr_0 = 3.5601e-04
Loss = 7.6547e-03, PNorm = 146.5509, GNorm = 0.3280, lr_0 = 3.5576e-04
Loss = 5.6801e-03, PNorm = 146.5666, GNorm = 0.0737, lr_0 = 3.5552e-04
Loss = 6.9926e-03, PNorm = 146.5792, GNorm = 0.6164, lr_0 = 3.5528e-04
Loss = 6.2564e-03, PNorm = 146.5947, GNorm = 0.4110, lr_0 = 3.5503e-04
Loss = 5.4089e-03, PNorm = 146.6089, GNorm = 0.6228, lr_0 = 3.5479e-04
Loss = 5.3617e-03, PNorm = 146.6233, GNorm = 0.1696, lr_0 = 3.5455e-04
Loss = 6.6613e-03, PNorm = 146.6380, GNorm = 0.0952, lr_0 = 3.5430e-04
Loss = 6.1792e-03, PNorm = 146.6508, GNorm = 0.1085, lr_0 = 3.5406e-04
Loss = 7.6989e-03, PNorm = 146.6656, GNorm = 0.3385, lr_0 = 3.5382e-04
Loss = 7.6475e-03, PNorm = 146.6798, GNorm = 0.0952, lr_0 = 3.5358e-04
Loss = 6.1672e-03, PNorm = 146.6966, GNorm = 0.0845, lr_0 = 3.5333e-04
Loss = 5.0648e-03, PNorm = 146.7159, GNorm = 0.1079, lr_0 = 3.5309e-04
Loss = 4.7157e-03, PNorm = 146.7306, GNorm = 0.2310, lr_0 = 3.5285e-04
Loss = 5.5586e-03, PNorm = 146.7417, GNorm = 0.1005, lr_0 = 3.5261e-04
Loss = 5.8161e-03, PNorm = 146.7529, GNorm = 0.3846, lr_0 = 3.5237e-04
Loss = 5.1601e-03, PNorm = 146.7670, GNorm = 0.2344, lr_0 = 3.5212e-04
Loss = 4.5629e-03, PNorm = 146.7836, GNorm = 0.1137, lr_0 = 3.5188e-04
Loss = 6.1986e-03, PNorm = 146.7947, GNorm = 0.2765, lr_0 = 3.5164e-04
Loss = 6.7436e-03, PNorm = 146.8086, GNorm = 0.2863, lr_0 = 3.5140e-04
Loss = 6.1941e-03, PNorm = 146.8212, GNorm = 0.2502, lr_0 = 3.5116e-04
Loss = 6.5773e-03, PNorm = 146.8321, GNorm = 0.2443, lr_0 = 3.5092e-04
Loss = 5.1057e-03, PNorm = 146.8503, GNorm = 0.1771, lr_0 = 3.5068e-04
Loss = 8.0000e-03, PNorm = 146.8701, GNorm = 0.6390, lr_0 = 3.5044e-04
Loss = 5.7941e-03, PNorm = 146.8797, GNorm = 0.1578, lr_0 = 3.5020e-04
Loss = 9.6174e-03, PNorm = 146.8922, GNorm = 0.1160, lr_0 = 3.4996e-04
Loss = 6.9210e-03, PNorm = 146.9054, GNorm = 0.3730, lr_0 = 3.4972e-04
Loss = 5.8595e-03, PNorm = 146.9228, GNorm = 0.3173, lr_0 = 3.4948e-04
Loss = 7.0083e-03, PNorm = 146.9402, GNorm = 0.1157, lr_0 = 3.4924e-04
Loss = 8.9036e-03, PNorm = 146.9573, GNorm = 0.3175, lr_0 = 3.4900e-04
Loss = 5.6275e-03, PNorm = 146.9767, GNorm = 0.6176, lr_0 = 3.4876e-04
Loss = 5.2569e-03, PNorm = 146.9925, GNorm = 0.3215, lr_0 = 3.4852e-04
Loss = 4.7824e-03, PNorm = 147.0051, GNorm = 0.1905, lr_0 = 3.4828e-04
Loss = 6.2715e-03, PNorm = 147.0225, GNorm = 0.1440, lr_0 = 3.4805e-04
Loss = 8.2628e-03, PNorm = 147.0362, GNorm = 0.0810, lr_0 = 3.4781e-04
Loss = 5.1985e-03, PNorm = 147.0466, GNorm = 0.2665, lr_0 = 3.4757e-04
Loss = 6.7256e-03, PNorm = 147.0596, GNorm = 0.2086, lr_0 = 3.4733e-04
Loss = 5.6140e-03, PNorm = 147.0742, GNorm = 0.2596, lr_0 = 3.4709e-04
Loss = 6.8438e-03, PNorm = 147.0932, GNorm = 0.2842, lr_0 = 3.4686e-04
Loss = 6.1195e-03, PNorm = 147.1082, GNorm = 0.2334, lr_0 = 3.4662e-04
Loss = 4.3420e-03, PNorm = 147.1193, GNorm = 0.1164, lr_0 = 3.4638e-04
Loss = 6.3658e-03, PNorm = 147.1311, GNorm = 0.1676, lr_0 = 3.4614e-04
Loss = 6.1477e-03, PNorm = 147.1438, GNorm = 0.0892, lr_0 = 3.4591e-04
Loss = 8.6864e-03, PNorm = 147.1577, GNorm = 0.1428, lr_0 = 3.4567e-04
Loss = 5.8834e-03, PNorm = 147.1711, GNorm = 0.3462, lr_0 = 3.4543e-04
Loss = 6.6934e-03, PNorm = 147.1881, GNorm = 0.2509, lr_0 = 3.4520e-04
Loss = 6.1502e-03, PNorm = 147.2013, GNorm = 0.1440, lr_0 = 3.4496e-04
Loss = 5.0429e-03, PNorm = 147.2129, GNorm = 0.1463, lr_0 = 3.4472e-04
Loss = 5.0176e-03, PNorm = 147.2246, GNorm = 0.2473, lr_0 = 3.4449e-04
Loss = 6.4312e-03, PNorm = 147.2353, GNorm = 0.0909, lr_0 = 3.4425e-04
Loss = 6.4315e-03, PNorm = 147.2519, GNorm = 0.3037, lr_0 = 3.4402e-04
Loss = 6.5925e-03, PNorm = 147.2698, GNorm = 0.1552, lr_0 = 3.4378e-04
Loss = 5.6522e-03, PNorm = 147.2834, GNorm = 0.2261, lr_0 = 3.4354e-04
Loss = 7.2628e-03, PNorm = 147.2991, GNorm = 0.3464, lr_0 = 3.4331e-04
Validation mae = 0.478520
Epoch 15
Loss = 7.2783e-03, PNorm = 147.3096, GNorm = 0.1461, lr_0 = 3.4307e-04
Loss = 5.7733e-03, PNorm = 147.3236, GNorm = 0.2429, lr_0 = 3.4284e-04
Loss = 4.3796e-03, PNorm = 147.3366, GNorm = 0.1990, lr_0 = 3.4260e-04
Loss = 6.2872e-03, PNorm = 147.3496, GNorm = 0.4620, lr_0 = 3.4237e-04
Loss = 5.4016e-03, PNorm = 147.3621, GNorm = 0.1220, lr_0 = 3.4213e-04
Loss = 6.3207e-03, PNorm = 147.3761, GNorm = 0.2385, lr_0 = 3.4190e-04
Loss = 4.3867e-03, PNorm = 147.3859, GNorm = 0.1794, lr_0 = 3.4167e-04
Loss = 5.2182e-03, PNorm = 147.3948, GNorm = 0.1164, lr_0 = 3.4143e-04
Loss = 4.9584e-03, PNorm = 147.4041, GNorm = 0.1133, lr_0 = 3.4120e-04
Loss = 5.2335e-03, PNorm = 147.4170, GNorm = 0.0923, lr_0 = 3.4096e-04
Loss = 5.1192e-03, PNorm = 147.4278, GNorm = 0.1341, lr_0 = 3.4073e-04
Loss = 5.6114e-03, PNorm = 147.4345, GNorm = 0.1452, lr_0 = 3.4050e-04
Loss = 7.7567e-03, PNorm = 147.4391, GNorm = 0.7552, lr_0 = 3.4026e-04
Loss = 4.8300e-03, PNorm = 147.4504, GNorm = 0.4612, lr_0 = 3.4003e-04
Loss = 6.2689e-03, PNorm = 147.4636, GNorm = 0.2629, lr_0 = 3.3980e-04
Loss = 5.7757e-03, PNorm = 147.4789, GNorm = 0.2514, lr_0 = 3.3956e-04
Loss = 6.2217e-03, PNorm = 147.4912, GNorm = 0.1728, lr_0 = 3.3933e-04
Loss = 6.1594e-03, PNorm = 147.4980, GNorm = 0.1115, lr_0 = 3.3910e-04
Loss = 5.1939e-03, PNorm = 147.5068, GNorm = 0.1580, lr_0 = 3.3887e-04
Loss = 5.4046e-03, PNorm = 147.5217, GNorm = 0.3995, lr_0 = 3.3864e-04
Loss = 5.4466e-03, PNorm = 147.5353, GNorm = 0.2068, lr_0 = 3.3840e-04
Loss = 5.4177e-03, PNorm = 147.5473, GNorm = 0.1630, lr_0 = 3.3817e-04
Loss = 4.9977e-03, PNorm = 147.5604, GNorm = 0.3218, lr_0 = 3.3794e-04
Loss = 5.8208e-03, PNorm = 147.5700, GNorm = 0.1295, lr_0 = 3.3771e-04
Loss = 5.0220e-03, PNorm = 147.5806, GNorm = 0.3679, lr_0 = 3.3748e-04
Loss = 4.5891e-03, PNorm = 147.5912, GNorm = 0.1641, lr_0 = 3.3725e-04
Loss = 7.2176e-03, PNorm = 147.6015, GNorm = 0.2274, lr_0 = 3.3701e-04
Loss = 4.9088e-03, PNorm = 147.6130, GNorm = 0.2190, lr_0 = 3.3678e-04
Loss = 6.5180e-03, PNorm = 147.6237, GNorm = 0.0839, lr_0 = 3.3655e-04
Loss = 4.1236e-03, PNorm = 147.6346, GNorm = 0.0864, lr_0 = 3.3632e-04
Loss = 4.8658e-03, PNorm = 147.6442, GNorm = 0.3163, lr_0 = 3.3609e-04
Loss = 4.9173e-03, PNorm = 147.6519, GNorm = 0.2339, lr_0 = 3.3586e-04
Loss = 5.0023e-03, PNorm = 147.6603, GNorm = 0.3115, lr_0 = 3.3563e-04
Loss = 4.8298e-03, PNorm = 147.6718, GNorm = 0.3237, lr_0 = 3.3540e-04
Loss = 4.5005e-03, PNorm = 147.6814, GNorm = 0.1202, lr_0 = 3.3517e-04
Loss = 4.6350e-03, PNorm = 147.6917, GNorm = 0.3181, lr_0 = 3.3494e-04
Loss = 5.2184e-03, PNorm = 147.7006, GNorm = 0.2061, lr_0 = 3.3471e-04
Loss = 5.0597e-03, PNorm = 147.7133, GNorm = 0.5414, lr_0 = 3.3448e-04
Loss = 4.2839e-03, PNorm = 147.7231, GNorm = 0.3149, lr_0 = 3.3425e-04
Loss = 5.8973e-03, PNorm = 147.7360, GNorm = 0.4318, lr_0 = 3.3403e-04
Loss = 5.4348e-03, PNorm = 147.7442, GNorm = 0.1798, lr_0 = 3.3380e-04
Loss = 6.3859e-03, PNorm = 147.7515, GNorm = 0.3371, lr_0 = 3.3357e-04
Loss = 5.4135e-03, PNorm = 147.7641, GNorm = 0.1369, lr_0 = 3.3334e-04
Loss = 5.5266e-03, PNorm = 147.7754, GNorm = 0.3901, lr_0 = 3.3311e-04
Loss = 4.3218e-03, PNorm = 147.7873, GNorm = 0.3951, lr_0 = 3.3288e-04
Loss = 5.4197e-03, PNorm = 147.7981, GNorm = 0.2012, lr_0 = 3.3265e-04
Loss = 5.0831e-03, PNorm = 147.8062, GNorm = 0.3424, lr_0 = 3.3243e-04
Loss = 4.6431e-03, PNorm = 147.8181, GNorm = 0.2517, lr_0 = 3.3220e-04
Loss = 5.2527e-03, PNorm = 147.8310, GNorm = 0.1513, lr_0 = 3.3197e-04
Loss = 5.0780e-03, PNorm = 147.8440, GNorm = 0.1494, lr_0 = 3.3174e-04
Loss = 4.6917e-03, PNorm = 147.8589, GNorm = 0.1376, lr_0 = 3.3152e-04
Loss = 5.0881e-03, PNorm = 147.8754, GNorm = 0.2175, lr_0 = 3.3129e-04
Loss = 8.0942e-03, PNorm = 147.8910, GNorm = 0.0904, lr_0 = 3.3106e-04
Loss = 4.9633e-03, PNorm = 147.9056, GNorm = 0.1817, lr_0 = 3.3084e-04
Loss = 4.8465e-03, PNorm = 147.9168, GNorm = 0.1737, lr_0 = 3.3061e-04
Loss = 4.3509e-03, PNorm = 147.9276, GNorm = 0.2335, lr_0 = 3.3038e-04
Loss = 5.6155e-03, PNorm = 147.9343, GNorm = 0.2486, lr_0 = 3.3016e-04
Loss = 4.4633e-03, PNorm = 147.9436, GNorm = 0.2190, lr_0 = 3.2993e-04
Loss = 4.2792e-03, PNorm = 147.9511, GNorm = 0.1358, lr_0 = 3.2970e-04
Loss = 4.9403e-03, PNorm = 147.9668, GNorm = 0.2414, lr_0 = 3.2948e-04
Loss = 4.2239e-03, PNorm = 147.9814, GNorm = 0.0984, lr_0 = 3.2925e-04
Loss = 5.4120e-03, PNorm = 147.9919, GNorm = 0.1603, lr_0 = 3.2903e-04
Loss = 7.1912e-03, PNorm = 148.0049, GNorm = 0.0749, lr_0 = 3.2880e-04
Loss = 4.5918e-03, PNorm = 148.0169, GNorm = 0.2961, lr_0 = 3.2858e-04
Loss = 5.8138e-03, PNorm = 148.0290, GNorm = 0.2265, lr_0 = 3.2835e-04
Loss = 4.5259e-03, PNorm = 148.0402, GNorm = 0.2374, lr_0 = 3.2813e-04
Loss = 5.0820e-03, PNorm = 148.0540, GNorm = 0.1340, lr_0 = 3.2790e-04
Loss = 5.0375e-03, PNorm = 148.0672, GNorm = 0.3459, lr_0 = 3.2768e-04
Loss = 6.3607e-03, PNorm = 148.0797, GNorm = 0.3253, lr_0 = 3.2745e-04
Loss = 7.2234e-03, PNorm = 148.0919, GNorm = 0.2204, lr_0 = 3.2723e-04
Loss = 3.9729e-03, PNorm = 148.1031, GNorm = 0.3183, lr_0 = 3.2700e-04
Loss = 5.1895e-03, PNorm = 148.1167, GNorm = 0.2267, lr_0 = 3.2678e-04
Loss = 6.6725e-03, PNorm = 148.1301, GNorm = 0.2000, lr_0 = 3.2656e-04
Loss = 5.2816e-03, PNorm = 148.1402, GNorm = 0.3991, lr_0 = 3.2633e-04
Loss = 5.0887e-03, PNorm = 148.1545, GNorm = 0.1213, lr_0 = 3.2611e-04
Loss = 5.8764e-03, PNorm = 148.1662, GNorm = 0.2371, lr_0 = 3.2589e-04
Loss = 4.6470e-03, PNorm = 148.1786, GNorm = 0.2787, lr_0 = 3.2566e-04
Loss = 5.4002e-03, PNorm = 148.1847, GNorm = 0.4069, lr_0 = 3.2544e-04
Loss = 6.1352e-03, PNorm = 148.1951, GNorm = 0.2914, lr_0 = 3.2522e-04
Loss = 6.7052e-03, PNorm = 148.2069, GNorm = 0.1881, lr_0 = 3.2499e-04
Loss = 6.1258e-03, PNorm = 148.2202, GNorm = 0.1865, lr_0 = 3.2477e-04
Loss = 6.6268e-03, PNorm = 148.2308, GNorm = 0.1955, lr_0 = 3.2455e-04
Loss = 6.3373e-03, PNorm = 148.2438, GNorm = 0.2645, lr_0 = 3.2433e-04
Loss = 4.5528e-03, PNorm = 148.2560, GNorm = 0.4101, lr_0 = 3.2410e-04
Loss = 4.7352e-03, PNorm = 148.2695, GNorm = 0.1366, lr_0 = 3.2388e-04
Loss = 4.8700e-03, PNorm = 148.2826, GNorm = 0.1600, lr_0 = 3.2366e-04
Loss = 7.0361e-03, PNorm = 148.2966, GNorm = 0.1240, lr_0 = 3.2344e-04
Loss = 5.2213e-03, PNorm = 148.3065, GNorm = 0.2209, lr_0 = 3.2322e-04
Loss = 7.7320e-03, PNorm = 148.3209, GNorm = 0.3089, lr_0 = 3.2300e-04
Loss = 4.8926e-03, PNorm = 148.3346, GNorm = 0.2631, lr_0 = 3.2277e-04
Loss = 5.4285e-03, PNorm = 148.3468, GNorm = 0.3229, lr_0 = 3.2255e-04
Loss = 4.2778e-03, PNorm = 148.3569, GNorm = 0.0690, lr_0 = 3.2233e-04
Loss = 4.8014e-03, PNorm = 148.3676, GNorm = 0.2366, lr_0 = 3.2211e-04
Loss = 4.5454e-03, PNorm = 148.3801, GNorm = 0.1064, lr_0 = 3.2189e-04
Loss = 8.5360e-03, PNorm = 148.3900, GNorm = 0.2774, lr_0 = 3.2167e-04
Loss = 5.2820e-03, PNorm = 148.4015, GNorm = 0.1604, lr_0 = 3.2145e-04
Loss = 4.5695e-03, PNorm = 148.4108, GNorm = 0.2263, lr_0 = 3.2123e-04
Loss = 5.1582e-03, PNorm = 148.4242, GNorm = 0.2551, lr_0 = 3.2101e-04
Loss = 5.5534e-03, PNorm = 148.4346, GNorm = 0.5346, lr_0 = 3.2079e-04
Loss = 7.3511e-03, PNorm = 148.4534, GNorm = 0.4585, lr_0 = 3.2057e-04
Loss = 4.3876e-03, PNorm = 148.4663, GNorm = 0.3098, lr_0 = 3.2035e-04
Loss = 5.5957e-03, PNorm = 148.4766, GNorm = 0.2229, lr_0 = 3.2013e-04
Loss = 5.4220e-03, PNorm = 148.4834, GNorm = 0.1052, lr_0 = 3.1991e-04
Loss = 7.3228e-03, PNorm = 148.4972, GNorm = 0.1311, lr_0 = 3.1969e-04
Loss = 4.2398e-03, PNorm = 148.5114, GNorm = 0.2485, lr_0 = 3.1947e-04
Loss = 4.2558e-03, PNorm = 148.5237, GNorm = 0.1898, lr_0 = 3.1925e-04
Loss = 8.8062e-03, PNorm = 148.5393, GNorm = 0.2405, lr_0 = 3.1904e-04
Loss = 5.4593e-03, PNorm = 148.5513, GNorm = 0.1342, lr_0 = 3.1882e-04
Loss = 5.0702e-03, PNorm = 148.5663, GNorm = 0.2578, lr_0 = 3.1860e-04
Loss = 5.2720e-03, PNorm = 148.5788, GNorm = 0.1614, lr_0 = 3.1838e-04
Loss = 4.7124e-03, PNorm = 148.5890, GNorm = 0.1356, lr_0 = 3.1816e-04
Loss = 5.2024e-03, PNorm = 148.6025, GNorm = 0.1099, lr_0 = 3.1794e-04
Loss = 4.7868e-03, PNorm = 148.6162, GNorm = 0.1283, lr_0 = 3.1773e-04
Loss = 4.6453e-03, PNorm = 148.6281, GNorm = 0.3082, lr_0 = 3.1751e-04
Loss = 4.9339e-03, PNorm = 148.6363, GNorm = 0.1513, lr_0 = 3.1729e-04
Loss = 4.6862e-03, PNorm = 148.6461, GNorm = 0.1617, lr_0 = 3.1707e-04
Loss = 5.0672e-03, PNorm = 148.6569, GNorm = 0.2323, lr_0 = 3.1686e-04
Loss = 6.2736e-03, PNorm = 148.6701, GNorm = 0.4950, lr_0 = 3.1664e-04
Loss = 5.5495e-03, PNorm = 148.6830, GNorm = 0.2364, lr_0 = 3.1642e-04
Loss = 7.4527e-03, PNorm = 148.6982, GNorm = 0.3432, lr_0 = 3.1621e-04
Validation mae = 0.478057
Epoch 16
Loss = 3.9595e-03, PNorm = 148.7114, GNorm = 0.1141, lr_0 = 3.1599e-04
Loss = 4.2021e-03, PNorm = 148.7187, GNorm = 0.1703, lr_0 = 3.1577e-04
Loss = 6.0988e-03, PNorm = 148.7242, GNorm = 0.0913, lr_0 = 3.1556e-04
Loss = 4.6201e-03, PNorm = 148.7302, GNorm = 0.1884, lr_0 = 3.1534e-04
Loss = 4.3987e-03, PNorm = 148.7381, GNorm = 0.3219, lr_0 = 3.1512e-04
Loss = 6.1459e-03, PNorm = 148.7490, GNorm = 0.2694, lr_0 = 3.1491e-04
Loss = 4.6459e-03, PNorm = 148.7559, GNorm = 0.3032, lr_0 = 3.1469e-04
Loss = 3.7822e-03, PNorm = 148.7613, GNorm = 0.1795, lr_0 = 3.1448e-04
Loss = 5.3566e-03, PNorm = 148.7706, GNorm = 0.0849, lr_0 = 3.1426e-04
Loss = 4.4956e-03, PNorm = 148.7783, GNorm = 0.2021, lr_0 = 3.1405e-04
Loss = 3.7776e-03, PNorm = 148.7854, GNorm = 0.2532, lr_0 = 3.1383e-04
Loss = 3.4610e-03, PNorm = 148.7894, GNorm = 0.2138, lr_0 = 3.1362e-04
Loss = 4.2659e-03, PNorm = 148.7959, GNorm = 0.1153, lr_0 = 3.1340e-04
Loss = 3.3732e-03, PNorm = 148.8036, GNorm = 0.4094, lr_0 = 3.1319e-04
Loss = 6.7802e-03, PNorm = 148.8156, GNorm = 0.0901, lr_0 = 3.1297e-04
Loss = 3.9678e-03, PNorm = 148.8201, GNorm = 0.2949, lr_0 = 3.1276e-04
Loss = 4.0630e-03, PNorm = 148.8250, GNorm = 0.0805, lr_0 = 3.1254e-04
Loss = 5.9909e-03, PNorm = 148.8363, GNorm = 0.4637, lr_0 = 3.1233e-04
Loss = 4.3677e-03, PNorm = 148.8478, GNorm = 0.1655, lr_0 = 3.1212e-04
Loss = 4.5424e-03, PNorm = 148.8586, GNorm = 0.1276, lr_0 = 3.1190e-04
Loss = 4.3297e-03, PNorm = 148.8654, GNorm = 0.2156, lr_0 = 3.1169e-04
Loss = 3.9556e-03, PNorm = 148.8745, GNorm = 0.1125, lr_0 = 3.1147e-04
Loss = 4.3446e-03, PNorm = 148.8807, GNorm = 0.2367, lr_0 = 3.1126e-04
Loss = 4.5231e-03, PNorm = 148.8887, GNorm = 0.1418, lr_0 = 3.1105e-04
Loss = 4.4427e-03, PNorm = 148.8956, GNorm = 0.2298, lr_0 = 3.1083e-04
Loss = 5.8222e-03, PNorm = 148.9050, GNorm = 0.4298, lr_0 = 3.1062e-04
Loss = 4.2563e-03, PNorm = 148.9115, GNorm = 0.1879, lr_0 = 3.1041e-04
Loss = 6.4429e-03, PNorm = 148.9219, GNorm = 0.1274, lr_0 = 3.1020e-04
Loss = 5.7042e-03, PNorm = 148.9274, GNorm = 0.0797, lr_0 = 3.0998e-04
Loss = 3.9205e-03, PNorm = 148.9390, GNorm = 0.1644, lr_0 = 3.0977e-04
Loss = 4.3679e-03, PNorm = 148.9492, GNorm = 0.1535, lr_0 = 3.0956e-04
Loss = 5.7731e-03, PNorm = 148.9628, GNorm = 0.2766, lr_0 = 3.0935e-04
Loss = 7.0922e-03, PNorm = 148.9710, GNorm = 0.2017, lr_0 = 3.0914e-04
Loss = 4.4236e-03, PNorm = 148.9805, GNorm = 0.0800, lr_0 = 3.0892e-04
Loss = 5.2384e-03, PNorm = 148.9900, GNorm = 0.0766, lr_0 = 3.0871e-04
Loss = 5.1164e-03, PNorm = 149.0008, GNorm = 0.1444, lr_0 = 3.0850e-04
Loss = 4.4065e-03, PNorm = 149.0061, GNorm = 0.4270, lr_0 = 3.0829e-04
Loss = 4.1511e-03, PNorm = 149.0141, GNorm = 0.2054, lr_0 = 3.0808e-04
Loss = 3.8113e-03, PNorm = 149.0204, GNorm = 0.1109, lr_0 = 3.0787e-04
Loss = 4.6283e-03, PNorm = 149.0242, GNorm = 0.3330, lr_0 = 3.0766e-04
Loss = 5.2247e-03, PNorm = 149.0350, GNorm = 0.1047, lr_0 = 3.0745e-04
Loss = 5.4365e-03, PNorm = 149.0436, GNorm = 0.1591, lr_0 = 3.0723e-04
Loss = 4.5838e-03, PNorm = 149.0500, GNorm = 0.4847, lr_0 = 3.0702e-04
Loss = 4.1666e-03, PNorm = 149.0614, GNorm = 0.2196, lr_0 = 3.0681e-04
Loss = 4.4651e-03, PNorm = 149.0719, GNorm = 0.2355, lr_0 = 3.0660e-04
Loss = 4.8251e-03, PNorm = 149.0849, GNorm = 0.1330, lr_0 = 3.0639e-04
Loss = 6.6015e-03, PNorm = 149.0942, GNorm = 0.1528, lr_0 = 3.0618e-04
Loss = 5.1694e-03, PNorm = 149.1009, GNorm = 0.4625, lr_0 = 3.0597e-04
Loss = 3.9161e-03, PNorm = 149.1133, GNorm = 0.1786, lr_0 = 3.0576e-04
Loss = 4.3500e-03, PNorm = 149.1253, GNorm = 0.3249, lr_0 = 3.0555e-04
Loss = 3.8892e-03, PNorm = 149.1354, GNorm = 0.1807, lr_0 = 3.0535e-04
Loss = 4.4519e-03, PNorm = 149.1428, GNorm = 0.2285, lr_0 = 3.0514e-04
Loss = 4.5799e-03, PNorm = 149.1506, GNorm = 0.2374, lr_0 = 3.0493e-04
Loss = 3.9265e-03, PNorm = 149.1641, GNorm = 0.3113, lr_0 = 3.0472e-04
Loss = 5.3508e-03, PNorm = 149.1752, GNorm = 0.2802, lr_0 = 3.0451e-04
Loss = 3.7397e-03, PNorm = 149.1841, GNorm = 0.1796, lr_0 = 3.0430e-04
Loss = 4.8232e-03, PNorm = 149.1896, GNorm = 0.1286, lr_0 = 3.0409e-04
Loss = 5.5957e-03, PNorm = 149.1959, GNorm = 0.1147, lr_0 = 3.0388e-04
Loss = 3.1879e-03, PNorm = 149.2058, GNorm = 0.1372, lr_0 = 3.0368e-04
Loss = 5.4031e-03, PNorm = 149.2151, GNorm = 0.1175, lr_0 = 3.0347e-04
Loss = 4.4546e-03, PNorm = 149.2252, GNorm = 0.1082, lr_0 = 3.0326e-04
Loss = 4.1728e-03, PNorm = 149.2340, GNorm = 0.0757, lr_0 = 3.0305e-04
Loss = 4.2413e-03, PNorm = 149.2402, GNorm = 0.1913, lr_0 = 3.0284e-04
Loss = 4.4588e-03, PNorm = 149.2522, GNorm = 0.2139, lr_0 = 3.0264e-04
Loss = 3.4379e-03, PNorm = 149.2589, GNorm = 0.2017, lr_0 = 3.0243e-04
Loss = 3.4843e-03, PNorm = 149.2675, GNorm = 0.0907, lr_0 = 3.0222e-04
Loss = 4.0499e-03, PNorm = 149.2760, GNorm = 0.1457, lr_0 = 3.0202e-04
Loss = 3.8859e-03, PNorm = 149.2828, GNorm = 0.1645, lr_0 = 3.0181e-04
Loss = 4.8828e-03, PNorm = 149.2916, GNorm = 0.2376, lr_0 = 3.0160e-04
Loss = 5.8215e-03, PNorm = 149.2983, GNorm = 0.1301, lr_0 = 3.0140e-04
Loss = 5.5122e-03, PNorm = 149.3078, GNorm = 0.2614, lr_0 = 3.0119e-04
Loss = 4.2600e-03, PNorm = 149.3206, GNorm = 0.3781, lr_0 = 3.0098e-04
Loss = 3.6098e-03, PNorm = 149.3298, GNorm = 0.2129, lr_0 = 3.0078e-04
Loss = 4.1500e-03, PNorm = 149.3395, GNorm = 0.3469, lr_0 = 3.0057e-04
Loss = 4.6085e-03, PNorm = 149.3515, GNorm = 0.2576, lr_0 = 3.0036e-04
Loss = 4.4219e-03, PNorm = 149.3653, GNorm = 0.2198, lr_0 = 3.0016e-04
Loss = 3.3526e-03, PNorm = 149.3761, GNorm = 0.1844, lr_0 = 2.9995e-04
Loss = 5.2455e-03, PNorm = 149.3849, GNorm = 0.0693, lr_0 = 2.9975e-04
Loss = 3.6852e-03, PNorm = 149.3985, GNorm = 0.1170, lr_0 = 2.9954e-04
Loss = 3.6444e-03, PNorm = 149.4052, GNorm = 0.0962, lr_0 = 2.9934e-04
Loss = 4.3336e-03, PNorm = 149.4108, GNorm = 0.0487, lr_0 = 2.9913e-04
Loss = 3.8407e-03, PNorm = 149.4184, GNorm = 0.2601, lr_0 = 2.9893e-04
Loss = 4.6826e-03, PNorm = 149.4279, GNorm = 0.2205, lr_0 = 2.9872e-04
Loss = 5.4133e-03, PNorm = 149.4393, GNorm = 0.1680, lr_0 = 2.9852e-04
Loss = 4.8629e-03, PNorm = 149.4497, GNorm = 0.2280, lr_0 = 2.9831e-04
Loss = 6.4162e-03, PNorm = 149.4607, GNorm = 0.1703, lr_0 = 2.9811e-04
Loss = 4.0448e-03, PNorm = 149.4748, GNorm = 0.2817, lr_0 = 2.9790e-04
Loss = 3.8445e-03, PNorm = 149.4860, GNorm = 0.1784, lr_0 = 2.9770e-04
Loss = 4.0312e-03, PNorm = 149.4970, GNorm = 0.2192, lr_0 = 2.9750e-04
Loss = 3.6010e-03, PNorm = 149.5053, GNorm = 0.1218, lr_0 = 2.9729e-04
Loss = 4.7531e-03, PNorm = 149.5114, GNorm = 0.1560, lr_0 = 2.9709e-04
Loss = 4.7879e-03, PNorm = 149.5202, GNorm = 0.1840, lr_0 = 2.9689e-04
Loss = 3.7357e-03, PNorm = 149.5313, GNorm = 0.1043, lr_0 = 2.9668e-04
Loss = 4.0126e-03, PNorm = 149.5432, GNorm = 0.2511, lr_0 = 2.9648e-04
Loss = 4.4081e-03, PNorm = 149.5519, GNorm = 0.2442, lr_0 = 2.9628e-04
Loss = 4.1722e-03, PNorm = 149.5607, GNorm = 0.1519, lr_0 = 2.9607e-04
Loss = 4.5553e-03, PNorm = 149.5689, GNorm = 0.1300, lr_0 = 2.9587e-04
Loss = 3.7776e-03, PNorm = 149.5773, GNorm = 0.2089, lr_0 = 2.9567e-04
Loss = 4.7848e-03, PNorm = 149.5883, GNorm = 0.1726, lr_0 = 2.9546e-04
Loss = 3.4993e-03, PNorm = 149.6002, GNorm = 0.5442, lr_0 = 2.9526e-04
Loss = 6.0501e-03, PNorm = 149.6062, GNorm = 0.1248, lr_0 = 2.9506e-04
Loss = 3.8542e-03, PNorm = 149.6129, GNorm = 0.1554, lr_0 = 2.9486e-04
Loss = 3.6930e-03, PNorm = 149.6208, GNorm = 0.2092, lr_0 = 2.9466e-04
Loss = 6.4075e-03, PNorm = 149.6300, GNorm = 0.2554, lr_0 = 2.9445e-04
Loss = 3.8999e-03, PNorm = 149.6407, GNorm = 0.2268, lr_0 = 2.9425e-04
Loss = 4.7099e-03, PNorm = 149.6505, GNorm = 0.1129, lr_0 = 2.9405e-04
Loss = 3.3288e-03, PNorm = 149.6594, GNorm = 0.1535, lr_0 = 2.9385e-04
Loss = 3.2152e-03, PNorm = 149.6655, GNorm = 0.1192, lr_0 = 2.9365e-04
Loss = 4.0926e-03, PNorm = 149.6770, GNorm = 0.4123, lr_0 = 2.9345e-04
Loss = 4.2655e-03, PNorm = 149.6895, GNorm = 0.2427, lr_0 = 2.9325e-04
Loss = 6.6811e-03, PNorm = 149.7002, GNorm = 0.1955, lr_0 = 2.9305e-04
Loss = 4.5254e-03, PNorm = 149.7082, GNorm = 0.1450, lr_0 = 2.9284e-04
Loss = 3.7534e-03, PNorm = 149.7171, GNorm = 0.1554, lr_0 = 2.9264e-04
Loss = 4.2089e-03, PNorm = 149.7256, GNorm = 0.1137, lr_0 = 2.9244e-04
Loss = 3.7856e-03, PNorm = 149.7363, GNorm = 0.1844, lr_0 = 2.9224e-04
Loss = 3.7365e-03, PNorm = 149.7451, GNorm = 0.1767, lr_0 = 2.9204e-04
Loss = 5.6929e-03, PNorm = 149.7549, GNorm = 0.4624, lr_0 = 2.9184e-04
Loss = 6.2945e-03, PNorm = 149.7651, GNorm = 0.5590, lr_0 = 2.9164e-04
Loss = 9.8296e-03, PNorm = 149.7774, GNorm = 0.1311, lr_0 = 2.9144e-04
Loss = 6.6497e-03, PNorm = 149.7866, GNorm = 0.3960, lr_0 = 2.9124e-04
Validation mae = 0.476647
Epoch 17
Loss = 3.9926e-03, PNorm = 149.7940, GNorm = 0.0838, lr_0 = 2.9104e-04
Loss = 4.1663e-03, PNorm = 149.8005, GNorm = 0.6964, lr_0 = 2.9084e-04
Loss = 4.1802e-03, PNorm = 149.8063, GNorm = 0.1924, lr_0 = 2.9065e-04
Loss = 3.2965e-03, PNorm = 149.8158, GNorm = 0.1188, lr_0 = 2.9045e-04
Loss = 4.0091e-03, PNorm = 149.8221, GNorm = 0.2272, lr_0 = 2.9025e-04
Loss = 3.4062e-03, PNorm = 149.8288, GNorm = 0.2914, lr_0 = 2.9005e-04
Loss = 3.9761e-03, PNorm = 149.8379, GNorm = 0.1573, lr_0 = 2.8985e-04
Loss = 5.3231e-03, PNorm = 149.8458, GNorm = 0.0644, lr_0 = 2.8965e-04
Loss = 4.3379e-03, PNorm = 149.8562, GNorm = 0.0816, lr_0 = 2.8945e-04
Loss = 3.2465e-03, PNorm = 149.8628, GNorm = 0.0686, lr_0 = 2.8925e-04
Loss = 3.6296e-03, PNorm = 149.8709, GNorm = 0.1951, lr_0 = 2.8906e-04
Loss = 5.1365e-03, PNorm = 149.8772, GNorm = 0.1586, lr_0 = 2.8886e-04
Loss = 4.0593e-03, PNorm = 149.8866, GNorm = 0.1509, lr_0 = 2.8866e-04
Loss = 3.2549e-03, PNorm = 149.8939, GNorm = 0.3142, lr_0 = 2.8846e-04
Loss = 4.7798e-03, PNorm = 149.9013, GNorm = 0.1027, lr_0 = 2.8826e-04
Loss = 4.2535e-03, PNorm = 149.9100, GNorm = 0.2009, lr_0 = 2.8807e-04
Loss = 3.4999e-03, PNorm = 149.9155, GNorm = 0.2122, lr_0 = 2.8787e-04
Loss = 4.7016e-03, PNorm = 149.9261, GNorm = 0.3734, lr_0 = 2.8767e-04
Loss = 3.9476e-03, PNorm = 149.9315, GNorm = 0.2683, lr_0 = 2.8748e-04
Loss = 4.2528e-03, PNorm = 149.9382, GNorm = 0.1641, lr_0 = 2.8728e-04
Loss = 3.5307e-03, PNorm = 149.9450, GNorm = 0.2748, lr_0 = 2.8708e-04
Loss = 4.1694e-03, PNorm = 149.9529, GNorm = 0.0918, lr_0 = 2.8689e-04
Loss = 3.9527e-03, PNorm = 149.9600, GNorm = 0.1009, lr_0 = 2.8669e-04
Loss = 3.3228e-03, PNorm = 149.9672, GNorm = 0.1820, lr_0 = 2.8649e-04
Loss = 4.3525e-03, PNorm = 149.9749, GNorm = 0.3317, lr_0 = 2.8630e-04
Loss = 3.4903e-03, PNorm = 149.9834, GNorm = 0.1913, lr_0 = 2.8610e-04
Loss = 3.9053e-03, PNorm = 149.9931, GNorm = 0.0983, lr_0 = 2.8590e-04
Loss = 3.8831e-03, PNorm = 150.0009, GNorm = 0.1141, lr_0 = 2.8571e-04
Loss = 3.3876e-03, PNorm = 150.0077, GNorm = 0.3267, lr_0 = 2.8551e-04
Loss = 3.2593e-03, PNorm = 150.0156, GNorm = 0.1253, lr_0 = 2.8532e-04
Loss = 3.7915e-03, PNorm = 150.0239, GNorm = 0.3033, lr_0 = 2.8512e-04
Loss = 4.9556e-03, PNorm = 150.0301, GNorm = 0.2219, lr_0 = 2.8493e-04
Loss = 3.3328e-03, PNorm = 150.0367, GNorm = 0.1585, lr_0 = 2.8473e-04
Loss = 2.9430e-03, PNorm = 150.0474, GNorm = 0.1233, lr_0 = 2.8454e-04
Loss = 4.4317e-03, PNorm = 150.0559, GNorm = 0.1719, lr_0 = 2.8434e-04
Loss = 3.9478e-03, PNorm = 150.0647, GNorm = 0.2202, lr_0 = 2.8415e-04
Loss = 5.4349e-03, PNorm = 150.0727, GNorm = 0.1919, lr_0 = 2.8395e-04
Loss = 4.2170e-03, PNorm = 150.0809, GNorm = 0.1192, lr_0 = 2.8376e-04
Loss = 3.1378e-03, PNorm = 150.0851, GNorm = 0.3171, lr_0 = 2.8356e-04
Loss = 3.1396e-03, PNorm = 150.0920, GNorm = 0.1739, lr_0 = 2.8337e-04
Loss = 3.1610e-03, PNorm = 150.1009, GNorm = 0.2027, lr_0 = 2.8317e-04
Loss = 3.1283e-03, PNorm = 150.1097, GNorm = 0.1266, lr_0 = 2.8298e-04
Loss = 3.9893e-03, PNorm = 150.1177, GNorm = 0.1824, lr_0 = 2.8279e-04
Loss = 4.1404e-03, PNorm = 150.1254, GNorm = 0.3448, lr_0 = 2.8259e-04
Loss = 3.0358e-03, PNorm = 150.1325, GNorm = 0.2326, lr_0 = 2.8240e-04
Loss = 3.9125e-03, PNorm = 150.1397, GNorm = 0.3503, lr_0 = 2.8221e-04
Loss = 2.9558e-03, PNorm = 150.1498, GNorm = 0.1199, lr_0 = 2.8201e-04
Loss = 3.0409e-03, PNorm = 150.1602, GNorm = 0.1339, lr_0 = 2.8182e-04
Loss = 3.8713e-03, PNorm = 150.1709, GNorm = 0.1614, lr_0 = 2.8163e-04
Loss = 4.7365e-03, PNorm = 150.1792, GNorm = 0.2256, lr_0 = 2.8143e-04
Loss = 4.2937e-03, PNorm = 150.1850, GNorm = 0.2190, lr_0 = 2.8124e-04
Loss = 6.1132e-03, PNorm = 150.1911, GNorm = 0.2621, lr_0 = 2.8105e-04
Loss = 2.9944e-03, PNorm = 150.1958, GNorm = 0.0849, lr_0 = 2.8085e-04
Loss = 3.1485e-03, PNorm = 150.2020, GNorm = 0.1286, lr_0 = 2.8066e-04
Loss = 3.5423e-03, PNorm = 150.2100, GNorm = 0.2929, lr_0 = 2.8047e-04
Loss = 3.2200e-03, PNorm = 150.2195, GNorm = 0.0984, lr_0 = 2.8028e-04
Loss = 2.7953e-03, PNorm = 150.2267, GNorm = 0.0708, lr_0 = 2.8009e-04
Loss = 3.2720e-03, PNorm = 150.2329, GNorm = 0.1693, lr_0 = 2.7989e-04
Loss = 5.3314e-03, PNorm = 150.2421, GNorm = 0.1554, lr_0 = 2.7970e-04
Loss = 3.4125e-03, PNorm = 150.2513, GNorm = 0.3755, lr_0 = 2.7951e-04
Loss = 3.1236e-03, PNorm = 150.2612, GNorm = 0.1182, lr_0 = 2.7932e-04
Loss = 3.6239e-03, PNorm = 150.2722, GNorm = 0.1001, lr_0 = 2.7913e-04
Loss = 3.4991e-03, PNorm = 150.2811, GNorm = 0.2904, lr_0 = 2.7894e-04
Loss = 4.0849e-03, PNorm = 150.2880, GNorm = 0.1184, lr_0 = 2.7875e-04
Loss = 3.4489e-03, PNorm = 150.2931, GNorm = 0.1582, lr_0 = 2.7855e-04
Loss = 3.4267e-03, PNorm = 150.3021, GNorm = 0.1512, lr_0 = 2.7836e-04
Loss = 4.0476e-03, PNorm = 150.3072, GNorm = 0.0875, lr_0 = 2.7817e-04
Loss = 3.7721e-03, PNorm = 150.3124, GNorm = 0.2960, lr_0 = 2.7798e-04
Loss = 3.3688e-03, PNorm = 150.3197, GNorm = 0.1226, lr_0 = 2.7779e-04
Loss = 4.0325e-03, PNorm = 150.3308, GNorm = 0.2490, lr_0 = 2.7760e-04
Loss = 2.8140e-03, PNorm = 150.3400, GNorm = 0.1388, lr_0 = 2.7741e-04
Loss = 3.4980e-03, PNorm = 150.3502, GNorm = 0.2424, lr_0 = 2.7722e-04
Loss = 4.4852e-03, PNorm = 150.3585, GNorm = 0.2127, lr_0 = 2.7703e-04
Loss = 4.8294e-03, PNorm = 150.3637, GNorm = 0.1030, lr_0 = 2.7684e-04
Loss = 3.2845e-03, PNorm = 150.3730, GNorm = 0.2557, lr_0 = 2.7665e-04
Loss = 4.4272e-03, PNorm = 150.3800, GNorm = 0.0876, lr_0 = 2.7646e-04
Loss = 2.9693e-03, PNorm = 150.3872, GNorm = 0.2521, lr_0 = 2.7627e-04
Loss = 2.7389e-03, PNorm = 150.3957, GNorm = 0.1421, lr_0 = 2.7608e-04
Loss = 3.7260e-03, PNorm = 150.4081, GNorm = 0.2016, lr_0 = 2.7590e-04
Loss = 4.4223e-03, PNorm = 150.4168, GNorm = 0.1828, lr_0 = 2.7571e-04
Loss = 4.8031e-03, PNorm = 150.4253, GNorm = 0.1493, lr_0 = 2.7552e-04
Loss = 5.3162e-03, PNorm = 150.4361, GNorm = 0.2312, lr_0 = 2.7533e-04
Loss = 4.3303e-03, PNorm = 150.4441, GNorm = 0.3451, lr_0 = 2.7514e-04
Loss = 4.2124e-03, PNorm = 150.4522, GNorm = 0.2048, lr_0 = 2.7495e-04
Loss = 4.1055e-03, PNorm = 150.4636, GNorm = 0.2426, lr_0 = 2.7476e-04
Loss = 4.4829e-03, PNorm = 150.4721, GNorm = 0.1186, lr_0 = 2.7457e-04
Loss = 3.3499e-03, PNorm = 150.4801, GNorm = 0.0626, lr_0 = 2.7439e-04
Loss = 4.1162e-03, PNorm = 150.4863, GNorm = 0.0871, lr_0 = 2.7420e-04
Loss = 4.3009e-03, PNorm = 150.4944, GNorm = 0.0871, lr_0 = 2.7401e-04
Loss = 3.2264e-03, PNorm = 150.5035, GNorm = 0.0957, lr_0 = 2.7382e-04
Loss = 4.0316e-03, PNorm = 150.5113, GNorm = 0.2302, lr_0 = 2.7364e-04
Loss = 4.2867e-03, PNorm = 150.5205, GNorm = 0.1249, lr_0 = 2.7345e-04
Loss = 2.9038e-03, PNorm = 150.5260, GNorm = 0.0959, lr_0 = 2.7326e-04
Loss = 4.6236e-03, PNorm = 150.5335, GNorm = 0.3304, lr_0 = 2.7307e-04
Loss = 2.8011e-03, PNorm = 150.5407, GNorm = 0.1909, lr_0 = 2.7289e-04
Loss = 5.8081e-03, PNorm = 150.5494, GNorm = 0.1940, lr_0 = 2.7270e-04
Loss = 3.6833e-03, PNorm = 150.5572, GNorm = 0.0806, lr_0 = 2.7251e-04
Loss = 4.2557e-03, PNorm = 150.5664, GNorm = 0.3187, lr_0 = 2.7233e-04
Loss = 4.8399e-03, PNorm = 150.5740, GNorm = 0.1770, lr_0 = 2.7214e-04
Loss = 4.4152e-03, PNorm = 150.5793, GNorm = 0.1460, lr_0 = 2.7195e-04
Loss = 3.7603e-03, PNorm = 150.5869, GNorm = 0.1444, lr_0 = 2.7177e-04
Loss = 3.0331e-03, PNorm = 150.5959, GNorm = 0.1607, lr_0 = 2.7158e-04
Loss = 3.5141e-03, PNorm = 150.6013, GNorm = 0.1288, lr_0 = 2.7139e-04
Loss = 3.3991e-03, PNorm = 150.6074, GNorm = 0.1401, lr_0 = 2.7121e-04
Loss = 3.0570e-03, PNorm = 150.6129, GNorm = 0.1240, lr_0 = 2.7102e-04
Loss = 2.7835e-03, PNorm = 150.6213, GNorm = 0.2198, lr_0 = 2.7084e-04
Loss = 5.4262e-03, PNorm = 150.6286, GNorm = 0.1075, lr_0 = 2.7065e-04
Loss = 3.0264e-03, PNorm = 150.6363, GNorm = 0.0893, lr_0 = 2.7047e-04
Loss = 5.0236e-03, PNorm = 150.6461, GNorm = 0.0892, lr_0 = 2.7028e-04
Loss = 2.9578e-03, PNorm = 150.6517, GNorm = 0.0783, lr_0 = 2.7010e-04
Loss = 3.8131e-03, PNorm = 150.6593, GNorm = 0.1871, lr_0 = 2.6991e-04
Loss = 5.1216e-03, PNorm = 150.6680, GNorm = 0.1864, lr_0 = 2.6973e-04
Loss = 3.2273e-03, PNorm = 150.6783, GNorm = 0.3762, lr_0 = 2.6954e-04
Loss = 3.0496e-03, PNorm = 150.6873, GNorm = 0.1453, lr_0 = 2.6936e-04
Loss = 6.5769e-03, PNorm = 150.6936, GNorm = 0.3422, lr_0 = 2.6917e-04
Loss = 5.6212e-03, PNorm = 150.7033, GNorm = 0.0894, lr_0 = 2.6899e-04
Loss = 8.7873e-03, PNorm = 150.7141, GNorm = 0.1091, lr_0 = 2.6880e-04
Loss = 6.3734e-03, PNorm = 150.7240, GNorm = 0.0587, lr_0 = 2.6862e-04
Loss = 3.3250e-03, PNorm = 150.7358, GNorm = 0.3744, lr_0 = 2.6844e-04
Loss = 5.2844e-03, PNorm = 150.7478, GNorm = 0.1472, lr_0 = 2.6825e-04
Validation mae = 0.475981
Epoch 18
Loss = 2.8236e-03, PNorm = 150.7571, GNorm = 0.1017, lr_0 = 2.6807e-04
Loss = 3.1797e-03, PNorm = 150.7636, GNorm = 0.2135, lr_0 = 2.6788e-04
Loss = 3.8907e-03, PNorm = 150.7693, GNorm = 0.0583, lr_0 = 2.6770e-04
Loss = 2.8112e-03, PNorm = 150.7719, GNorm = 0.0883, lr_0 = 2.6752e-04
Loss = 2.7569e-03, PNorm = 150.7777, GNorm = 0.1128, lr_0 = 2.6733e-04
Loss = 3.1694e-03, PNorm = 150.7840, GNorm = 0.0993, lr_0 = 2.6715e-04
Loss = 3.8925e-03, PNorm = 150.7914, GNorm = 0.0839, lr_0 = 2.6697e-04
Loss = 4.0470e-03, PNorm = 150.7998, GNorm = 0.1074, lr_0 = 2.6678e-04
Loss = 3.3871e-03, PNorm = 150.8023, GNorm = 0.3917, lr_0 = 2.6660e-04
Loss = 3.1950e-03, PNorm = 150.8098, GNorm = 0.1760, lr_0 = 2.6642e-04
Loss = 2.7733e-03, PNorm = 150.8153, GNorm = 0.2180, lr_0 = 2.6624e-04
Loss = 3.8861e-03, PNorm = 150.8201, GNorm = 0.0854, lr_0 = 2.6605e-04
Loss = 3.1819e-03, PNorm = 150.8260, GNorm = 0.1324, lr_0 = 2.6587e-04
Loss = 4.9970e-03, PNorm = 150.8338, GNorm = 0.2386, lr_0 = 2.6569e-04
Loss = 3.0685e-03, PNorm = 150.8422, GNorm = 0.0794, lr_0 = 2.6551e-04
Loss = 3.4827e-03, PNorm = 150.8452, GNorm = 0.0755, lr_0 = 2.6533e-04
Loss = 3.6001e-03, PNorm = 150.8469, GNorm = 0.1726, lr_0 = 2.6514e-04
Loss = 3.0863e-03, PNorm = 150.8502, GNorm = 0.1516, lr_0 = 2.6496e-04
Loss = 3.6102e-03, PNorm = 150.8565, GNorm = 0.0997, lr_0 = 2.6478e-04
Loss = 3.4617e-03, PNorm = 150.8639, GNorm = 0.0872, lr_0 = 2.6460e-04
Loss = 3.0667e-03, PNorm = 150.8714, GNorm = 0.0865, lr_0 = 2.6442e-04
Loss = 3.1257e-03, PNorm = 150.8739, GNorm = 0.1688, lr_0 = 2.6424e-04
Loss = 2.9133e-03, PNorm = 150.8789, GNorm = 0.1569, lr_0 = 2.6406e-04
Loss = 3.3625e-03, PNorm = 150.8877, GNorm = 0.0962, lr_0 = 2.6388e-04
Loss = 3.5075e-03, PNorm = 150.8963, GNorm = 0.0662, lr_0 = 2.6369e-04
Loss = 2.1415e-03, PNorm = 150.9035, GNorm = 0.0743, lr_0 = 2.6351e-04
Loss = 5.0723e-03, PNorm = 150.9090, GNorm = 0.0533, lr_0 = 2.6333e-04
Loss = 3.9076e-03, PNorm = 150.9165, GNorm = 0.1937, lr_0 = 2.6315e-04
Loss = 2.8423e-03, PNorm = 150.9222, GNorm = 0.1145, lr_0 = 2.6297e-04
Loss = 3.2333e-03, PNorm = 150.9287, GNorm = 0.1231, lr_0 = 2.6279e-04
Loss = 3.6427e-03, PNorm = 150.9350, GNorm = 0.1251, lr_0 = 2.6261e-04
Loss = 2.6188e-03, PNorm = 150.9459, GNorm = 0.1880, lr_0 = 2.6243e-04
Loss = 2.6344e-03, PNorm = 150.9541, GNorm = 0.0835, lr_0 = 2.6225e-04
Loss = 2.9039e-03, PNorm = 150.9626, GNorm = 0.2132, lr_0 = 2.6207e-04
Loss = 3.5924e-03, PNorm = 150.9718, GNorm = 0.0781, lr_0 = 2.6189e-04
Loss = 3.1571e-03, PNorm = 150.9785, GNorm = 0.2043, lr_0 = 2.6171e-04
Loss = 3.0435e-03, PNorm = 150.9826, GNorm = 0.3358, lr_0 = 2.6153e-04
Loss = 3.4678e-03, PNorm = 150.9867, GNorm = 0.1185, lr_0 = 2.6136e-04
Loss = 4.9612e-03, PNorm = 150.9914, GNorm = 0.1236, lr_0 = 2.6118e-04
Loss = 3.0670e-03, PNorm = 150.9992, GNorm = 0.0812, lr_0 = 2.6100e-04
Loss = 2.8358e-03, PNorm = 151.0072, GNorm = 0.1088, lr_0 = 2.6082e-04
Loss = 3.1760e-03, PNorm = 151.0149, GNorm = 0.1552, lr_0 = 2.6064e-04
Loss = 5.6538e-03, PNorm = 151.0209, GNorm = 0.0900, lr_0 = 2.6046e-04
Loss = 2.6893e-03, PNorm = 151.0291, GNorm = 0.2647, lr_0 = 2.6028e-04
Loss = 4.6758e-03, PNorm = 151.0359, GNorm = 0.2312, lr_0 = 2.6011e-04
Loss = 2.9153e-03, PNorm = 151.0412, GNorm = 0.0801, lr_0 = 2.5993e-04
Loss = 2.9358e-03, PNorm = 151.0465, GNorm = 0.1757, lr_0 = 2.5975e-04
Loss = 2.2707e-03, PNorm = 151.0510, GNorm = 0.0559, lr_0 = 2.5957e-04
Loss = 2.6613e-03, PNorm = 151.0571, GNorm = 0.1058, lr_0 = 2.5939e-04
Loss = 4.7149e-03, PNorm = 151.0628, GNorm = 0.2503, lr_0 = 2.5922e-04
Loss = 3.3418e-03, PNorm = 151.0677, GNorm = 0.1600, lr_0 = 2.5904e-04
Loss = 3.2519e-03, PNorm = 151.0728, GNorm = 0.3080, lr_0 = 2.5886e-04
Loss = 3.2242e-03, PNorm = 151.0782, GNorm = 0.2266, lr_0 = 2.5868e-04
Loss = 6.1715e-03, PNorm = 151.0848, GNorm = 0.0595, lr_0 = 2.5851e-04
Loss = 3.1899e-03, PNorm = 151.0902, GNorm = 0.1339, lr_0 = 2.5833e-04
Loss = 2.5801e-03, PNorm = 151.0994, GNorm = 0.0494, lr_0 = 2.5815e-04
Loss = 4.6395e-03, PNorm = 151.1066, GNorm = 0.1316, lr_0 = 2.5797e-04
Loss = 3.1832e-03, PNorm = 151.1114, GNorm = 0.1324, lr_0 = 2.5780e-04
Loss = 2.4779e-03, PNorm = 151.1184, GNorm = 0.1575, lr_0 = 2.5762e-04
Loss = 2.2745e-03, PNorm = 151.1254, GNorm = 0.0775, lr_0 = 2.5745e-04
Loss = 3.4851e-03, PNorm = 151.1322, GNorm = 0.1536, lr_0 = 2.5727e-04
Loss = 3.1614e-03, PNorm = 151.1395, GNorm = 0.0912, lr_0 = 2.5709e-04
Loss = 4.8249e-03, PNorm = 151.1478, GNorm = 0.0845, lr_0 = 2.5692e-04
Loss = 2.8268e-03, PNorm = 151.1507, GNorm = 0.2624, lr_0 = 2.5674e-04
Loss = 2.9971e-03, PNorm = 151.1603, GNorm = 0.1104, lr_0 = 2.5656e-04
Loss = 2.7763e-03, PNorm = 151.1647, GNorm = 0.2445, lr_0 = 2.5639e-04
Loss = 2.7441e-03, PNorm = 151.1712, GNorm = 0.2878, lr_0 = 2.5621e-04
Loss = 3.5739e-03, PNorm = 151.1784, GNorm = 0.0772, lr_0 = 2.5604e-04
Loss = 4.8079e-03, PNorm = 151.1871, GNorm = 0.2471, lr_0 = 2.5586e-04
Loss = 4.5931e-03, PNorm = 151.1952, GNorm = 0.0682, lr_0 = 2.5569e-04
Loss = 2.7854e-03, PNorm = 151.2015, GNorm = 0.0973, lr_0 = 2.5551e-04
Loss = 3.9728e-03, PNorm = 151.2110, GNorm = 0.3371, lr_0 = 2.5534e-04
Loss = 3.2974e-03, PNorm = 151.2152, GNorm = 0.4514, lr_0 = 2.5516e-04
Loss = 3.4442e-03, PNorm = 151.2209, GNorm = 0.2003, lr_0 = 2.5499e-04
Loss = 2.7213e-03, PNorm = 151.2297, GNorm = 0.2167, lr_0 = 2.5481e-04
Loss = 2.5622e-03, PNorm = 151.2362, GNorm = 0.1084, lr_0 = 2.5464e-04
Loss = 3.2602e-03, PNorm = 151.2431, GNorm = 0.1497, lr_0 = 2.5446e-04
Loss = 2.6507e-03, PNorm = 151.2472, GNorm = 0.2379, lr_0 = 2.5429e-04
Loss = 2.4977e-03, PNorm = 151.2524, GNorm = 0.1497, lr_0 = 2.5411e-04
Loss = 3.5170e-03, PNorm = 151.2591, GNorm = 0.1041, lr_0 = 2.5394e-04
Loss = 2.9455e-03, PNorm = 151.2643, GNorm = 0.1610, lr_0 = 2.5377e-04
Loss = 2.6839e-03, PNorm = 151.2738, GNorm = 0.1087, lr_0 = 2.5359e-04
Loss = 2.8958e-03, PNorm = 151.2824, GNorm = 0.0806, lr_0 = 2.5342e-04
Loss = 4.6227e-03, PNorm = 151.2864, GNorm = 0.2302, lr_0 = 2.5325e-04
Loss = 3.0458e-03, PNorm = 151.2922, GNorm = 0.1444, lr_0 = 2.5307e-04
Loss = 3.4265e-03, PNorm = 151.2990, GNorm = 0.1115, lr_0 = 2.5290e-04
Loss = 3.6778e-03, PNorm = 151.3088, GNorm = 0.1825, lr_0 = 2.5273e-04
Loss = 2.3630e-03, PNorm = 151.3170, GNorm = 0.0845, lr_0 = 2.5255e-04
Loss = 3.4045e-03, PNorm = 151.3227, GNorm = 0.0672, lr_0 = 2.5238e-04
Loss = 3.3768e-03, PNorm = 151.3296, GNorm = 0.2271, lr_0 = 2.5221e-04
Loss = 2.3835e-03, PNorm = 151.3368, GNorm = 0.1170, lr_0 = 2.5203e-04
Loss = 3.7628e-03, PNorm = 151.3418, GNorm = 0.1674, lr_0 = 2.5186e-04
Loss = 4.8721e-03, PNorm = 151.3504, GNorm = 0.3064, lr_0 = 2.5169e-04
Loss = 2.9024e-03, PNorm = 151.3557, GNorm = 0.1397, lr_0 = 2.5152e-04
Loss = 3.4548e-03, PNorm = 151.3611, GNorm = 0.0792, lr_0 = 2.5134e-04
Loss = 8.1871e-03, PNorm = 151.3670, GNorm = 1.2376, lr_0 = 2.5117e-04
Loss = 3.0419e-03, PNorm = 151.3737, GNorm = 0.0778, lr_0 = 2.5100e-04
Loss = 3.1446e-03, PNorm = 151.3841, GNorm = 0.1015, lr_0 = 2.5083e-04
Loss = 3.6198e-03, PNorm = 151.3933, GNorm = 0.1681, lr_0 = 2.5066e-04
Loss = 2.6871e-03, PNorm = 151.4015, GNorm = 0.2153, lr_0 = 2.5048e-04
Loss = 3.4885e-03, PNorm = 151.4078, GNorm = 0.1142, lr_0 = 2.5031e-04
Loss = 3.3226e-03, PNorm = 151.4119, GNorm = 0.1371, lr_0 = 2.5014e-04
Loss = 3.6245e-03, PNorm = 151.4185, GNorm = 0.0981, lr_0 = 2.4997e-04
Loss = 4.2397e-03, PNorm = 151.4249, GNorm = 0.1167, lr_0 = 2.4980e-04
Loss = 3.0855e-03, PNorm = 151.4293, GNorm = 0.2356, lr_0 = 2.4963e-04
Loss = 4.0291e-03, PNorm = 151.4363, GNorm = 0.1847, lr_0 = 2.4946e-04
Loss = 3.5572e-03, PNorm = 151.4459, GNorm = 0.3931, lr_0 = 2.4929e-04
Loss = 4.7185e-03, PNorm = 151.4555, GNorm = 0.5178, lr_0 = 2.4911e-04
Loss = 3.2366e-03, PNorm = 151.4607, GNorm = 0.2959, lr_0 = 2.4894e-04
Loss = 2.8890e-03, PNorm = 151.4699, GNorm = 0.0649, lr_0 = 2.4877e-04
Loss = 2.7603e-03, PNorm = 151.4777, GNorm = 0.1550, lr_0 = 2.4860e-04
Loss = 3.0751e-03, PNorm = 151.4845, GNorm = 0.1602, lr_0 = 2.4843e-04
Loss = 3.2338e-03, PNorm = 151.4933, GNorm = 0.3073, lr_0 = 2.4826e-04
Loss = 2.8775e-03, PNorm = 151.5032, GNorm = 0.0720, lr_0 = 2.4809e-04
Loss = 3.1336e-03, PNorm = 151.5136, GNorm = 0.2980, lr_0 = 2.4792e-04
Loss = 4.7044e-03, PNorm = 151.5203, GNorm = 0.1829, lr_0 = 2.4775e-04
Loss = 3.4766e-03, PNorm = 151.5233, GNorm = 0.1731, lr_0 = 2.4758e-04
Loss = 3.6196e-03, PNorm = 151.5289, GNorm = 0.0463, lr_0 = 2.4741e-04
Loss = 5.1580e-03, PNorm = 151.5374, GNorm = 0.5226, lr_0 = 2.4724e-04
Loss = 3.4877e-03, PNorm = 151.5466, GNorm = 0.3065, lr_0 = 2.4707e-04
Validation mae = 0.475758
Epoch 19
Loss = 3.0509e-03, PNorm = 151.5551, GNorm = 0.2616, lr_0 = 2.4690e-04
Loss = 3.2844e-03, PNorm = 151.5589, GNorm = 0.1747, lr_0 = 2.4674e-04
Loss = 4.0636e-03, PNorm = 151.5654, GNorm = 0.0765, lr_0 = 2.4657e-04
Loss = 2.2012e-03, PNorm = 151.5726, GNorm = 0.0835, lr_0 = 2.4640e-04
Loss = 3.4641e-03, PNorm = 151.5796, GNorm = 0.2569, lr_0 = 2.4623e-04
Loss = 2.7614e-03, PNorm = 151.5846, GNorm = 0.0814, lr_0 = 2.4606e-04
Loss = 3.6729e-03, PNorm = 151.5886, GNorm = 0.2580, lr_0 = 2.4589e-04
Loss = 2.2323e-03, PNorm = 151.5903, GNorm = 0.1147, lr_0 = 2.4572e-04
Loss = 2.7474e-03, PNorm = 151.5945, GNorm = 0.1012, lr_0 = 2.4556e-04
Loss = 3.0715e-03, PNorm = 151.6004, GNorm = 0.1333, lr_0 = 2.4539e-04
Loss = 2.5541e-03, PNorm = 151.6061, GNorm = 0.3448, lr_0 = 2.4522e-04
Loss = 3.1330e-03, PNorm = 151.6101, GNorm = 0.0808, lr_0 = 2.4505e-04
Loss = 2.1300e-03, PNorm = 151.6145, GNorm = 0.1543, lr_0 = 2.4488e-04
Loss = 2.3519e-03, PNorm = 151.6205, GNorm = 0.0584, lr_0 = 2.4472e-04
Loss = 2.4887e-03, PNorm = 151.6242, GNorm = 0.2071, lr_0 = 2.4455e-04
Loss = 3.7872e-03, PNorm = 151.6278, GNorm = 0.1300, lr_0 = 2.4438e-04
Loss = 3.7552e-03, PNorm = 151.6330, GNorm = 0.0841, lr_0 = 2.4421e-04
Loss = 2.7071e-03, PNorm = 151.6361, GNorm = 0.1685, lr_0 = 2.4405e-04
Loss = 4.0247e-03, PNorm = 151.6415, GNorm = 0.1849, lr_0 = 2.4388e-04
Loss = 2.6240e-03, PNorm = 151.6483, GNorm = 0.2693, lr_0 = 2.4371e-04
Loss = 2.7926e-03, PNorm = 151.6556, GNorm = 0.0923, lr_0 = 2.4354e-04
Loss = 2.4929e-03, PNorm = 151.6617, GNorm = 0.1946, lr_0 = 2.4338e-04
Loss = 4.1875e-03, PNorm = 151.6681, GNorm = 0.4399, lr_0 = 2.4321e-04
Loss = 3.3105e-03, PNorm = 151.6728, GNorm = 0.1650, lr_0 = 2.4304e-04
Loss = 2.6959e-03, PNorm = 151.6804, GNorm = 0.1988, lr_0 = 2.4288e-04
Loss = 3.1254e-03, PNorm = 151.6849, GNorm = 0.1901, lr_0 = 2.4271e-04
Loss = 2.6899e-03, PNorm = 151.6900, GNorm = 0.1527, lr_0 = 2.4254e-04
Loss = 3.7518e-03, PNorm = 151.6934, GNorm = 0.0932, lr_0 = 2.4238e-04
Loss = 3.0648e-03, PNorm = 151.6991, GNorm = 0.0865, lr_0 = 2.4221e-04
Loss = 2.6193e-03, PNorm = 151.7051, GNorm = 0.0606, lr_0 = 2.4205e-04
Loss = 3.2487e-03, PNorm = 151.7104, GNorm = 0.1953, lr_0 = 2.4188e-04
Loss = 2.8855e-03, PNorm = 151.7147, GNorm = 0.2429, lr_0 = 2.4171e-04
Loss = 2.8672e-03, PNorm = 151.7210, GNorm = 0.0674, lr_0 = 2.4155e-04
Loss = 2.6051e-03, PNorm = 151.7281, GNorm = 0.1395, lr_0 = 2.4138e-04
Loss = 2.2779e-03, PNorm = 151.7342, GNorm = 0.0741, lr_0 = 2.4122e-04
Loss = 4.0199e-03, PNorm = 151.7374, GNorm = 0.1630, lr_0 = 2.4105e-04
Loss = 2.4361e-03, PNorm = 151.7416, GNorm = 0.0595, lr_0 = 2.4089e-04
Loss = 2.5015e-03, PNorm = 151.7465, GNorm = 0.1018, lr_0 = 2.4072e-04
Loss = 2.3090e-03, PNorm = 151.7529, GNorm = 0.1684, lr_0 = 2.4056e-04
Loss = 2.9455e-03, PNorm = 151.7602, GNorm = 0.1607, lr_0 = 2.4039e-04
Loss = 2.9900e-03, PNorm = 151.7671, GNorm = 0.1425, lr_0 = 2.4023e-04
Loss = 2.3684e-03, PNorm = 151.7735, GNorm = 0.1283, lr_0 = 2.4006e-04
Loss = 3.5215e-03, PNorm = 151.7798, GNorm = 0.1042, lr_0 = 2.3990e-04
Loss = 4.2522e-03, PNorm = 151.7892, GNorm = 0.3418, lr_0 = 2.3974e-04
Loss = 3.0381e-03, PNorm = 151.7948, GNorm = 0.1080, lr_0 = 2.3957e-04
Loss = 2.5053e-03, PNorm = 151.8000, GNorm = 0.1830, lr_0 = 2.3941e-04
Loss = 2.4154e-03, PNorm = 151.8032, GNorm = 0.1009, lr_0 = 2.3924e-04
Loss = 3.0889e-03, PNorm = 151.8093, GNorm = 0.0872, lr_0 = 2.3908e-04
Loss = 2.4548e-03, PNorm = 151.8133, GNorm = 0.2250, lr_0 = 2.3892e-04
Loss = 2.1063e-03, PNorm = 151.8188, GNorm = 0.2350, lr_0 = 2.3875e-04
Loss = 2.2167e-03, PNorm = 151.8249, GNorm = 0.1157, lr_0 = 2.3859e-04
Loss = 2.5143e-03, PNorm = 151.8295, GNorm = 0.2250, lr_0 = 2.3842e-04
Loss = 3.0036e-03, PNorm = 151.8345, GNorm = 0.1966, lr_0 = 2.3826e-04
Loss = 3.2312e-03, PNorm = 151.8402, GNorm = 0.2504, lr_0 = 2.3810e-04
Loss = 3.5220e-03, PNorm = 151.8459, GNorm = 0.3664, lr_0 = 2.3794e-04
Loss = 2.2636e-03, PNorm = 151.8508, GNorm = 0.0763, lr_0 = 2.3777e-04
Loss = 2.1321e-03, PNorm = 151.8560, GNorm = 0.2927, lr_0 = 2.3761e-04
Loss = 3.0895e-03, PNorm = 151.8592, GNorm = 0.0919, lr_0 = 2.3745e-04
Loss = 2.1373e-03, PNorm = 151.8640, GNorm = 0.1240, lr_0 = 2.3728e-04
Loss = 3.7935e-03, PNorm = 151.8688, GNorm = 0.1169, lr_0 = 2.3712e-04
Loss = 2.2222e-03, PNorm = 151.8733, GNorm = 0.3037, lr_0 = 2.3696e-04
Loss = 4.6517e-03, PNorm = 151.8779, GNorm = 0.3713, lr_0 = 2.3680e-04
Loss = 7.0366e-03, PNorm = 151.8853, GNorm = 0.2964, lr_0 = 2.3663e-04
Loss = 2.4087e-03, PNorm = 151.8920, GNorm = 0.2570, lr_0 = 2.3647e-04
Loss = 2.6523e-03, PNorm = 151.8966, GNorm = 0.0565, lr_0 = 2.3631e-04
Loss = 2.3507e-03, PNorm = 151.9028, GNorm = 0.2789, lr_0 = 2.3615e-04
Loss = 2.6769e-03, PNorm = 151.9091, GNorm = 0.0998, lr_0 = 2.3599e-04
Loss = 3.0880e-03, PNorm = 151.9161, GNorm = 0.1033, lr_0 = 2.3582e-04
Loss = 3.0646e-03, PNorm = 151.9234, GNorm = 0.2222, lr_0 = 2.3566e-04
Loss = 2.9135e-03, PNorm = 151.9317, GNorm = 0.0638, lr_0 = 2.3550e-04
Loss = 2.8605e-03, PNorm = 151.9356, GNorm = 0.2258, lr_0 = 2.3534e-04
Loss = 2.5402e-03, PNorm = 151.9426, GNorm = 0.2128, lr_0 = 2.3518e-04
Loss = 3.5578e-03, PNorm = 151.9465, GNorm = 0.2651, lr_0 = 2.3502e-04
Loss = 3.2889e-03, PNorm = 151.9539, GNorm = 0.1904, lr_0 = 2.3486e-04
Loss = 2.4862e-03, PNorm = 151.9595, GNorm = 0.1437, lr_0 = 2.3470e-04
Loss = 2.7624e-03, PNorm = 151.9622, GNorm = 0.1790, lr_0 = 2.3454e-04
Loss = 2.0919e-03, PNorm = 151.9655, GNorm = 0.1725, lr_0 = 2.3437e-04
Loss = 3.9014e-03, PNorm = 151.9711, GNorm = 0.0660, lr_0 = 2.3421e-04
Loss = 2.0892e-03, PNorm = 151.9786, GNorm = 0.0888, lr_0 = 2.3405e-04
Loss = 2.2238e-03, PNorm = 151.9852, GNorm = 0.1503, lr_0 = 2.3389e-04
Loss = 5.3072e-03, PNorm = 151.9894, GNorm = 0.1177, lr_0 = 2.3373e-04
Loss = 3.3394e-03, PNorm = 151.9970, GNorm = 0.1178, lr_0 = 2.3357e-04
Loss = 2.6373e-03, PNorm = 152.0049, GNorm = 0.0754, lr_0 = 2.3341e-04
Loss = 2.1769e-03, PNorm = 152.0130, GNorm = 0.1707, lr_0 = 2.3325e-04
Loss = 2.2633e-03, PNorm = 152.0168, GNorm = 0.2952, lr_0 = 2.3309e-04
Loss = 3.2384e-03, PNorm = 152.0259, GNorm = 0.1455, lr_0 = 2.3293e-04
Loss = 2.9491e-03, PNorm = 152.0326, GNorm = 0.2102, lr_0 = 2.3277e-04
Loss = 2.7932e-03, PNorm = 152.0373, GNorm = 0.0753, lr_0 = 2.3261e-04
Loss = 2.4647e-03, PNorm = 152.0414, GNorm = 0.2296, lr_0 = 2.3246e-04
Loss = 2.3041e-03, PNorm = 152.0466, GNorm = 0.1795, lr_0 = 2.3230e-04
Loss = 4.0238e-03, PNorm = 152.0527, GNorm = 0.2486, lr_0 = 2.3214e-04
Loss = 4.5206e-03, PNorm = 152.0592, GNorm = 0.1761, lr_0 = 2.3198e-04
Loss = 5.7477e-03, PNorm = 152.0668, GNorm = 0.1757, lr_0 = 2.3182e-04
Loss = 2.0638e-03, PNorm = 152.0732, GNorm = 0.0617, lr_0 = 2.3166e-04
Loss = 2.3721e-03, PNorm = 152.0825, GNorm = 0.2761, lr_0 = 2.3150e-04
Loss = 1.9245e-03, PNorm = 152.0884, GNorm = 0.1923, lr_0 = 2.3134e-04
Loss = 2.6624e-03, PNorm = 152.0937, GNorm = 0.1455, lr_0 = 2.3118e-04
Loss = 2.5032e-03, PNorm = 152.0993, GNorm = 0.2416, lr_0 = 2.3103e-04
Loss = 2.9030e-03, PNorm = 152.1071, GNorm = 0.2801, lr_0 = 2.3087e-04
Loss = 5.7210e-03, PNorm = 152.1093, GNorm = 0.5587, lr_0 = 2.3071e-04
Loss = 3.2003e-03, PNorm = 152.1128, GNorm = 0.0915, lr_0 = 2.3055e-04
Loss = 3.1300e-03, PNorm = 152.1195, GNorm = 0.0826, lr_0 = 2.3039e-04
Loss = 1.9802e-03, PNorm = 152.1231, GNorm = 0.2689, lr_0 = 2.3024e-04
Loss = 2.3716e-03, PNorm = 152.1266, GNorm = 0.0466, lr_0 = 2.3008e-04
Loss = 3.0475e-03, PNorm = 152.1331, GNorm = 0.1378, lr_0 = 2.2992e-04
Loss = 4.3796e-03, PNorm = 152.1379, GNorm = 0.0481, lr_0 = 2.2976e-04
Loss = 2.5431e-03, PNorm = 152.1461, GNorm = 0.1418, lr_0 = 2.2961e-04
Loss = 4.8622e-03, PNorm = 152.1571, GNorm = 0.0507, lr_0 = 2.2945e-04
Loss = 4.1385e-03, PNorm = 152.1659, GNorm = 0.0667, lr_0 = 2.2929e-04
Loss = 2.7389e-03, PNorm = 152.1696, GNorm = 0.3565, lr_0 = 2.2913e-04
Loss = 2.2114e-03, PNorm = 152.1757, GNorm = 0.1472, lr_0 = 2.2898e-04
Loss = 5.2686e-03, PNorm = 152.1851, GNorm = 0.0499, lr_0 = 2.2882e-04
Loss = 2.6073e-03, PNorm = 152.1902, GNorm = 0.3425, lr_0 = 2.2866e-04
Loss = 3.7960e-03, PNorm = 152.1951, GNorm = 0.2578, lr_0 = 2.2851e-04
Loss = 2.3539e-03, PNorm = 152.1998, GNorm = 0.0486, lr_0 = 2.2835e-04
Loss = 3.0452e-03, PNorm = 152.2043, GNorm = 0.1534, lr_0 = 2.2819e-04
Loss = 3.3279e-03, PNorm = 152.2114, GNorm = 0.1282, lr_0 = 2.2804e-04
Loss = 3.9215e-03, PNorm = 152.2169, GNorm = 0.1377, lr_0 = 2.2788e-04
Loss = 2.3437e-03, PNorm = 152.2224, GNorm = 0.2186, lr_0 = 2.2773e-04
Loss = 3.5461e-03, PNorm = 152.2275, GNorm = 0.1257, lr_0 = 2.2757e-04
Validation mae = 0.475590
Epoch 20
Loss = 2.4206e-03, PNorm = 152.2318, GNorm = 0.1234, lr_0 = 2.2741e-04
Loss = 2.0873e-03, PNorm = 152.2371, GNorm = 0.1052, lr_0 = 2.2726e-04
Loss = 3.0831e-03, PNorm = 152.2424, GNorm = 0.3885, lr_0 = 2.2710e-04
Loss = 2.4166e-03, PNorm = 152.2465, GNorm = 0.0463, lr_0 = 2.2695e-04
Loss = 2.6478e-03, PNorm = 152.2515, GNorm = 0.1691, lr_0 = 2.2679e-04
Loss = 2.5614e-03, PNorm = 152.2542, GNorm = 0.1325, lr_0 = 2.2664e-04
Loss = 2.0419e-03, PNorm = 152.2558, GNorm = 0.1688, lr_0 = 2.2648e-04
Loss = 1.7520e-03, PNorm = 152.2595, GNorm = 0.2658, lr_0 = 2.2632e-04
Loss = 3.4130e-03, PNorm = 152.2635, GNorm = 0.1383, lr_0 = 2.2617e-04
Loss = 2.9147e-03, PNorm = 152.2688, GNorm = 0.1398, lr_0 = 2.2601e-04
Loss = 2.9483e-03, PNorm = 152.2720, GNorm = 0.0821, lr_0 = 2.2586e-04
Loss = 3.0858e-03, PNorm = 152.2768, GNorm = 0.2852, lr_0 = 2.2571e-04
Loss = 2.0037e-03, PNorm = 152.2805, GNorm = 0.1323, lr_0 = 2.2555e-04
Loss = 2.4079e-03, PNorm = 152.2837, GNorm = 0.1170, lr_0 = 2.2540e-04
Loss = 2.5281e-03, PNorm = 152.2881, GNorm = 0.1438, lr_0 = 2.2524e-04
Loss = 2.4673e-03, PNorm = 152.2909, GNorm = 0.1242, lr_0 = 2.2509e-04
Loss = 2.2776e-03, PNorm = 152.2970, GNorm = 0.1940, lr_0 = 2.2493e-04
Loss = 1.8993e-03, PNorm = 152.3022, GNorm = 0.1716, lr_0 = 2.2478e-04
Loss = 1.7502e-03, PNorm = 152.3077, GNorm = 0.1660, lr_0 = 2.2463e-04
Loss = 2.0207e-03, PNorm = 152.3102, GNorm = 0.2380, lr_0 = 2.2447e-04
Loss = 2.5194e-03, PNorm = 152.3129, GNorm = 0.0508, lr_0 = 2.2432e-04
Loss = 2.1720e-03, PNorm = 152.3149, GNorm = 0.0876, lr_0 = 2.2416e-04
Loss = 2.1370e-03, PNorm = 152.3176, GNorm = 0.0321, lr_0 = 2.2401e-04
Loss = 1.8938e-03, PNorm = 152.3195, GNorm = 0.1969, lr_0 = 2.2386e-04
Loss = 1.7435e-03, PNorm = 152.3255, GNorm = 0.1804, lr_0 = 2.2370e-04
Loss = 2.3105e-03, PNorm = 152.3297, GNorm = 0.1965, lr_0 = 2.2355e-04
Loss = 1.7615e-03, PNorm = 152.3335, GNorm = 0.0592, lr_0 = 2.2340e-04
Loss = 3.4222e-03, PNorm = 152.3386, GNorm = 0.1729, lr_0 = 2.2324e-04
Loss = 2.1688e-03, PNorm = 152.3454, GNorm = 0.0500, lr_0 = 2.2309e-04
Loss = 2.1516e-03, PNorm = 152.3475, GNorm = 0.2354, lr_0 = 2.2294e-04
Loss = 1.9934e-03, PNorm = 152.3505, GNorm = 0.0538, lr_0 = 2.2279e-04
Loss = 2.3225e-03, PNorm = 152.3535, GNorm = 0.1853, lr_0 = 2.2263e-04
Loss = 2.2014e-03, PNorm = 152.3575, GNorm = 0.1431, lr_0 = 2.2248e-04
Loss = 1.8973e-03, PNorm = 152.3627, GNorm = 0.1061, lr_0 = 2.2233e-04
Loss = 3.5487e-03, PNorm = 152.3701, GNorm = 0.1203, lr_0 = 2.2218e-04
Loss = 4.7383e-03, PNorm = 152.3741, GNorm = 0.1750, lr_0 = 2.2202e-04
Loss = 2.2517e-03, PNorm = 152.3783, GNorm = 0.2557, lr_0 = 2.2187e-04
Loss = 2.3848e-03, PNorm = 152.3829, GNorm = 0.3923, lr_0 = 2.2172e-04
Loss = 2.7579e-03, PNorm = 152.3894, GNorm = 0.1303, lr_0 = 2.2157e-04
Loss = 2.4380e-03, PNorm = 152.3936, GNorm = 0.0469, lr_0 = 2.2142e-04
Loss = 2.2422e-03, PNorm = 152.3979, GNorm = 0.1789, lr_0 = 2.2126e-04
Loss = 2.9439e-03, PNorm = 152.4033, GNorm = 0.1167, lr_0 = 2.2111e-04
Loss = 1.8889e-03, PNorm = 152.4092, GNorm = 0.1727, lr_0 = 2.2096e-04
Loss = 3.0917e-03, PNorm = 152.4141, GNorm = 0.1962, lr_0 = 2.2081e-04
Loss = 2.3035e-03, PNorm = 152.4169, GNorm = 0.2475, lr_0 = 2.2066e-04
Loss = 3.3965e-03, PNorm = 152.4198, GNorm = 0.1752, lr_0 = 2.2051e-04
Loss = 2.3400e-03, PNorm = 152.4232, GNorm = 0.1051, lr_0 = 2.2036e-04
Loss = 2.4379e-03, PNorm = 152.4283, GNorm = 0.0362, lr_0 = 2.2021e-04
Loss = 1.9545e-03, PNorm = 152.4318, GNorm = 0.0543, lr_0 = 2.2005e-04
Loss = 5.6898e-03, PNorm = 152.4352, GNorm = 0.0816, lr_0 = 2.1990e-04
Loss = 2.0546e-03, PNorm = 152.4380, GNorm = 0.2424, lr_0 = 2.1975e-04
Loss = 3.7677e-03, PNorm = 152.4441, GNorm = 0.1437, lr_0 = 2.1960e-04
Loss = 2.8109e-03, PNorm = 152.4497, GNorm = 0.0638, lr_0 = 2.1945e-04
Loss = 2.8662e-03, PNorm = 152.4543, GNorm = 0.0692, lr_0 = 2.1930e-04
Loss = 3.1733e-03, PNorm = 152.4575, GNorm = 0.1033, lr_0 = 2.1915e-04
Loss = 2.1660e-03, PNorm = 152.4620, GNorm = 0.1521, lr_0 = 2.1900e-04
Loss = 2.1730e-03, PNorm = 152.4657, GNorm = 0.0960, lr_0 = 2.1885e-04
Loss = 4.1105e-03, PNorm = 152.4687, GNorm = 0.1729, lr_0 = 2.1870e-04
Loss = 5.0946e-03, PNorm = 152.4750, GNorm = 0.2603, lr_0 = 2.1855e-04
Loss = 1.8451e-03, PNorm = 152.4817, GNorm = 0.0536, lr_0 = 2.1840e-04
Loss = 2.3929e-03, PNorm = 152.4891, GNorm = 0.0647, lr_0 = 2.1825e-04
Loss = 1.9306e-03, PNorm = 152.4925, GNorm = 0.1048, lr_0 = 2.1810e-04
Loss = 3.0438e-03, PNorm = 152.4968, GNorm = 0.0436, lr_0 = 2.1795e-04
Loss = 2.5527e-03, PNorm = 152.5032, GNorm = 0.1436, lr_0 = 2.1780e-04
Loss = 2.6904e-03, PNorm = 152.5073, GNorm = 0.0849, lr_0 = 2.1765e-04
Loss = 1.6829e-03, PNorm = 152.5138, GNorm = 0.1021, lr_0 = 2.1751e-04
Loss = 1.5680e-03, PNorm = 152.5183, GNorm = 0.1091, lr_0 = 2.1736e-04
Loss = 2.5643e-03, PNorm = 152.5221, GNorm = 0.2049, lr_0 = 2.1721e-04
Loss = 3.4532e-03, PNorm = 152.5265, GNorm = 0.0874, lr_0 = 2.1706e-04
Loss = 2.4235e-03, PNorm = 152.5316, GNorm = 0.1789, lr_0 = 2.1691e-04
Loss = 3.4047e-03, PNorm = 152.5379, GNorm = 0.3244, lr_0 = 2.1676e-04
Loss = 2.0448e-03, PNorm = 152.5436, GNorm = 0.1485, lr_0 = 2.1661e-04
Loss = 1.6636e-03, PNorm = 152.5475, GNorm = 0.0936, lr_0 = 2.1646e-04
Loss = 2.7313e-03, PNorm = 152.5544, GNorm = 0.1158, lr_0 = 2.1632e-04
Loss = 2.9451e-03, PNorm = 152.5586, GNorm = 0.1742, lr_0 = 2.1617e-04
Loss = 2.0773e-03, PNorm = 152.5616, GNorm = 0.0737, lr_0 = 2.1602e-04
Loss = 1.9072e-03, PNorm = 152.5641, GNorm = 0.0747, lr_0 = 2.1587e-04
Loss = 3.1241e-03, PNorm = 152.5652, GNorm = 0.0531, lr_0 = 2.1572e-04
Loss = 3.1655e-03, PNorm = 152.5692, GNorm = 0.1579, lr_0 = 2.1558e-04
Loss = 3.0422e-03, PNorm = 152.5755, GNorm = 0.1268, lr_0 = 2.1543e-04
Loss = 2.0916e-03, PNorm = 152.5808, GNorm = 0.1053, lr_0 = 2.1528e-04
Loss = 1.9154e-03, PNorm = 152.5858, GNorm = 0.2000, lr_0 = 2.1513e-04
Loss = 1.9372e-03, PNorm = 152.5895, GNorm = 0.0427, lr_0 = 2.1499e-04
Loss = 2.3195e-03, PNorm = 152.5923, GNorm = 0.1816, lr_0 = 2.1484e-04
Loss = 3.0854e-03, PNorm = 152.5974, GNorm = 0.2621, lr_0 = 2.1469e-04
Loss = 2.4755e-03, PNorm = 152.6038, GNorm = 0.1112, lr_0 = 2.1454e-04
Loss = 1.7810e-03, PNorm = 152.6094, GNorm = 0.1069, lr_0 = 2.1440e-04
Loss = 3.9120e-03, PNorm = 152.6151, GNorm = 0.1336, lr_0 = 2.1425e-04
Loss = 3.7866e-03, PNorm = 152.6201, GNorm = 0.0975, lr_0 = 2.1410e-04
Loss = 1.7728e-03, PNorm = 152.6263, GNorm = 0.0579, lr_0 = 2.1396e-04
Loss = 2.0618e-03, PNorm = 152.6299, GNorm = 0.2241, lr_0 = 2.1381e-04
Loss = 5.9442e-03, PNorm = 152.6334, GNorm = 0.0744, lr_0 = 2.1366e-04
Loss = 2.1940e-03, PNorm = 152.6382, GNorm = 0.2444, lr_0 = 2.1352e-04
Loss = 5.6798e-03, PNorm = 152.6448, GNorm = 0.4167, lr_0 = 2.1337e-04
Loss = 2.4222e-03, PNorm = 152.6495, GNorm = 0.1864, lr_0 = 2.1323e-04
Loss = 4.2983e-03, PNorm = 152.6530, GNorm = 0.0758, lr_0 = 2.1308e-04
Loss = 2.5836e-03, PNorm = 152.6551, GNorm = 0.0558, lr_0 = 2.1293e-04
Loss = 1.8324e-03, PNorm = 152.6610, GNorm = 0.0942, lr_0 = 2.1279e-04
Loss = 2.8952e-03, PNorm = 152.6659, GNorm = 0.1630, lr_0 = 2.1264e-04
Loss = 4.0708e-03, PNorm = 152.6706, GNorm = 0.0994, lr_0 = 2.1250e-04
Loss = 2.2100e-03, PNorm = 152.6744, GNorm = 0.0997, lr_0 = 2.1235e-04
Loss = 3.7222e-03, PNorm = 152.6768, GNorm = 0.0763, lr_0 = 2.1221e-04
Loss = 2.7490e-03, PNorm = 152.6803, GNorm = 0.1870, lr_0 = 2.1206e-04
Loss = 2.7781e-03, PNorm = 152.6831, GNorm = 0.1927, lr_0 = 2.1191e-04
Loss = 2.0852e-03, PNorm = 152.6893, GNorm = 0.0451, lr_0 = 2.1177e-04
Loss = 3.0646e-03, PNorm = 152.6930, GNorm = 0.2171, lr_0 = 2.1162e-04
Loss = 2.9722e-03, PNorm = 152.6974, GNorm = 0.1385, lr_0 = 2.1148e-04
Loss = 2.3660e-03, PNorm = 152.7021, GNorm = 0.1309, lr_0 = 2.1133e-04
Loss = 2.1180e-03, PNorm = 152.7062, GNorm = 0.1409, lr_0 = 2.1119e-04
Loss = 2.5751e-03, PNorm = 152.7096, GNorm = 0.1812, lr_0 = 2.1104e-04
Loss = 1.7866e-03, PNorm = 152.7159, GNorm = 0.2861, lr_0 = 2.1090e-04
Loss = 1.9789e-03, PNorm = 152.7226, GNorm = 0.1232, lr_0 = 2.1076e-04
Loss = 2.5498e-03, PNorm = 152.7269, GNorm = 0.0737, lr_0 = 2.1061e-04
Loss = 1.6774e-03, PNorm = 152.7328, GNorm = 0.0411, lr_0 = 2.1047e-04
Loss = 2.1826e-03, PNorm = 152.7374, GNorm = 0.0854, lr_0 = 2.1032e-04
Loss = 4.4688e-03, PNorm = 152.7434, GNorm = 0.1483, lr_0 = 2.1018e-04
Loss = 2.5220e-03, PNorm = 152.7497, GNorm = 0.1492, lr_0 = 2.1003e-04
Loss = 1.6314e-03, PNorm = 152.7524, GNorm = 0.0413, lr_0 = 2.0989e-04
Loss = 2.1436e-03, PNorm = 152.7573, GNorm = 0.1840, lr_0 = 2.0975e-04
Loss = 1.7592e-03, PNorm = 152.7628, GNorm = 0.2110, lr_0 = 2.0960e-04
Validation mae = 0.475982
Epoch 21
Loss = 2.0650e-03, PNorm = 152.7647, GNorm = 0.0523, lr_0 = 2.0946e-04
Loss = 1.5334e-03, PNorm = 152.7680, GNorm = 0.1249, lr_0 = 2.0932e-04
Loss = 2.0706e-03, PNorm = 152.7715, GNorm = 0.2336, lr_0 = 2.0917e-04
Loss = 2.6185e-03, PNorm = 152.7741, GNorm = 0.1229, lr_0 = 2.0903e-04
Loss = 1.6363e-03, PNorm = 152.7755, GNorm = 0.1644, lr_0 = 2.0889e-04
Loss = 1.4834e-03, PNorm = 152.7781, GNorm = 0.2566, lr_0 = 2.0874e-04
Loss = 2.2589e-03, PNorm = 152.7813, GNorm = 0.0940, lr_0 = 2.0860e-04
Loss = 2.5916e-03, PNorm = 152.7868, GNorm = 0.1550, lr_0 = 2.0846e-04
Loss = 2.3112e-03, PNorm = 152.7918, GNorm = 0.1443, lr_0 = 2.0831e-04
Loss = 1.7905e-03, PNorm = 152.7958, GNorm = 0.2115, lr_0 = 2.0817e-04
Loss = 2.7671e-03, PNorm = 152.7994, GNorm = 0.1693, lr_0 = 2.0803e-04
Loss = 1.6955e-03, PNorm = 152.8048, GNorm = 0.2025, lr_0 = 2.0789e-04
Loss = 3.1786e-03, PNorm = 152.8073, GNorm = 0.2202, lr_0 = 2.0774e-04
Loss = 1.8124e-03, PNorm = 152.8094, GNorm = 0.1770, lr_0 = 2.0760e-04
Loss = 2.2980e-03, PNorm = 152.8148, GNorm = 0.1738, lr_0 = 2.0746e-04
Loss = 1.7401e-03, PNorm = 152.8165, GNorm = 0.1584, lr_0 = 2.0732e-04
Loss = 2.6802e-03, PNorm = 152.8197, GNorm = 0.1593, lr_0 = 2.0718e-04
Loss = 2.7628e-03, PNorm = 152.8235, GNorm = 0.1787, lr_0 = 2.0703e-04
Loss = 3.0939e-03, PNorm = 152.8280, GNorm = 0.2162, lr_0 = 2.0689e-04
Loss = 2.4756e-03, PNorm = 152.8335, GNorm = 0.0679, lr_0 = 2.0675e-04
Loss = 1.8835e-03, PNorm = 152.8374, GNorm = 0.0870, lr_0 = 2.0661e-04
Loss = 1.6211e-03, PNorm = 152.8426, GNorm = 0.1195, lr_0 = 2.0647e-04
Loss = 1.9712e-03, PNorm = 152.8469, GNorm = 0.2264, lr_0 = 2.0633e-04
Loss = 1.7489e-03, PNorm = 152.8484, GNorm = 0.0935, lr_0 = 2.0618e-04
Loss = 2.3278e-03, PNorm = 152.8537, GNorm = 0.1502, lr_0 = 2.0604e-04
Loss = 2.8220e-03, PNorm = 152.8580, GNorm = 0.1608, lr_0 = 2.0590e-04
Loss = 1.7674e-03, PNorm = 152.8621, GNorm = 0.2551, lr_0 = 2.0576e-04
Loss = 2.4485e-03, PNorm = 152.8669, GNorm = 0.1079, lr_0 = 2.0562e-04
Loss = 1.9219e-03, PNorm = 152.8691, GNorm = 0.1043, lr_0 = 2.0548e-04
Loss = 1.7845e-03, PNorm = 152.8718, GNorm = 0.1038, lr_0 = 2.0534e-04
Loss = 3.1265e-03, PNorm = 152.8782, GNorm = 0.1466, lr_0 = 2.0520e-04
Loss = 3.2823e-03, PNorm = 152.8826, GNorm = 0.0664, lr_0 = 2.0506e-04
Loss = 2.2818e-03, PNorm = 152.8864, GNorm = 0.0701, lr_0 = 2.0492e-04
Loss = 2.4470e-03, PNorm = 152.8923, GNorm = 0.2366, lr_0 = 2.0478e-04
Loss = 2.1771e-03, PNorm = 152.8975, GNorm = 0.0685, lr_0 = 2.0464e-04
Loss = 2.2679e-03, PNorm = 152.9023, GNorm = 0.1056, lr_0 = 2.0450e-04
Loss = 1.7952e-03, PNorm = 152.9057, GNorm = 0.1774, lr_0 = 2.0436e-04
Loss = 1.5135e-03, PNorm = 152.9092, GNorm = 0.1205, lr_0 = 2.0422e-04
Loss = 1.6756e-03, PNorm = 152.9130, GNorm = 0.1114, lr_0 = 2.0408e-04
Loss = 2.2170e-03, PNorm = 152.9173, GNorm = 0.0428, lr_0 = 2.0394e-04
Loss = 1.9138e-03, PNorm = 152.9224, GNorm = 0.1051, lr_0 = 2.0380e-04
Loss = 1.4848e-03, PNorm = 152.9265, GNorm = 0.0952, lr_0 = 2.0366e-04
Loss = 1.7754e-03, PNorm = 152.9287, GNorm = 0.0731, lr_0 = 2.0352e-04
Loss = 1.9014e-03, PNorm = 152.9313, GNorm = 0.1125, lr_0 = 2.0338e-04
Loss = 2.0133e-03, PNorm = 152.9362, GNorm = 0.0872, lr_0 = 2.0324e-04
Loss = 2.0947e-03, PNorm = 152.9388, GNorm = 0.1702, lr_0 = 2.0310e-04
Loss = 2.2682e-03, PNorm = 152.9425, GNorm = 0.1013, lr_0 = 2.0296e-04
Loss = 2.7050e-03, PNorm = 152.9441, GNorm = 0.0636, lr_0 = 2.0282e-04
Loss = 2.9114e-03, PNorm = 152.9473, GNorm = 0.4807, lr_0 = 2.0268e-04
Loss = 2.4136e-03, PNorm = 152.9501, GNorm = 0.1548, lr_0 = 2.0254e-04
Loss = 3.0686e-03, PNorm = 152.9547, GNorm = 0.0793, lr_0 = 2.0240e-04
Loss = 2.4679e-03, PNorm = 152.9605, GNorm = 0.0520, lr_0 = 2.0227e-04
Loss = 1.4997e-03, PNorm = 152.9660, GNorm = 0.0793, lr_0 = 2.0213e-04
Loss = 3.1011e-03, PNorm = 152.9699, GNorm = 0.1699, lr_0 = 2.0199e-04
Loss = 1.8792e-03, PNorm = 152.9739, GNorm = 0.1400, lr_0 = 2.0185e-04
Loss = 3.3903e-03, PNorm = 152.9790, GNorm = 0.2254, lr_0 = 2.0171e-04
Loss = 1.8760e-03, PNorm = 152.9831, GNorm = 0.1131, lr_0 = 2.0157e-04
Loss = 3.9659e-03, PNorm = 152.9859, GNorm = 0.1703, lr_0 = 2.0144e-04
Loss = 2.0148e-03, PNorm = 152.9908, GNorm = 0.1244, lr_0 = 2.0130e-04
Loss = 1.9466e-03, PNorm = 152.9973, GNorm = 0.0695, lr_0 = 2.0116e-04
Loss = 1.6265e-03, PNorm = 153.0002, GNorm = 0.1915, lr_0 = 2.0102e-04
Loss = 1.6845e-03, PNorm = 153.0048, GNorm = 0.1952, lr_0 = 2.0088e-04
Loss = 1.5386e-03, PNorm = 153.0096, GNorm = 0.0759, lr_0 = 2.0075e-04
Loss = 1.7571e-03, PNorm = 153.0134, GNorm = 0.1105, lr_0 = 2.0061e-04
Loss = 2.0485e-03, PNorm = 153.0174, GNorm = 0.0697, lr_0 = 2.0047e-04
Loss = 1.5961e-03, PNorm = 153.0217, GNorm = 0.2417, lr_0 = 2.0033e-04
Loss = 1.5608e-03, PNorm = 153.0260, GNorm = 0.0950, lr_0 = 2.0020e-04
Loss = 1.9195e-03, PNorm = 153.0310, GNorm = 0.2065, lr_0 = 2.0006e-04
Loss = 3.7123e-03, PNorm = 153.0351, GNorm = 0.1316, lr_0 = 1.9992e-04
Loss = 2.1935e-03, PNorm = 153.0403, GNorm = 0.0383, lr_0 = 1.9979e-04
Loss = 3.4182e-03, PNorm = 153.0456, GNorm = 0.1233, lr_0 = 1.9965e-04
Loss = 1.5733e-03, PNorm = 153.0484, GNorm = 0.1155, lr_0 = 1.9951e-04
Loss = 2.1254e-03, PNorm = 153.0530, GNorm = 0.0905, lr_0 = 1.9938e-04
Loss = 2.7527e-03, PNorm = 153.0598, GNorm = 0.0926, lr_0 = 1.9924e-04
Loss = 1.7992e-03, PNorm = 153.0638, GNorm = 0.0904, lr_0 = 1.9910e-04
Loss = 2.6606e-03, PNorm = 153.0685, GNorm = 0.1295, lr_0 = 1.9897e-04
Loss = 2.6106e-03, PNorm = 153.0712, GNorm = 0.0478, lr_0 = 1.9883e-04
Loss = 4.2111e-03, PNorm = 153.0746, GNorm = 0.1266, lr_0 = 1.9869e-04
Loss = 2.3725e-03, PNorm = 153.0800, GNorm = 0.1389, lr_0 = 1.9856e-04
Loss = 3.0105e-03, PNorm = 153.0863, GNorm = 0.0682, lr_0 = 1.9842e-04
Loss = 5.6705e-03, PNorm = 153.0905, GNorm = 0.1789, lr_0 = 1.9829e-04
Loss = 6.1297e-03, PNorm = 153.0934, GNorm = 1.3124, lr_0 = 1.9815e-04
Loss = 2.0260e-03, PNorm = 153.0965, GNorm = 0.2095, lr_0 = 1.9801e-04
Loss = 2.6769e-03, PNorm = 153.1017, GNorm = 0.1685, lr_0 = 1.9788e-04
Loss = 1.6935e-03, PNorm = 153.1087, GNorm = 0.1663, lr_0 = 1.9774e-04
Loss = 1.8782e-03, PNorm = 153.1127, GNorm = 0.1045, lr_0 = 1.9761e-04
Loss = 1.9096e-03, PNorm = 153.1193, GNorm = 0.1185, lr_0 = 1.9747e-04
Loss = 2.0517e-03, PNorm = 153.1240, GNorm = 0.0963, lr_0 = 1.9734e-04
Loss = 1.6812e-03, PNorm = 153.1293, GNorm = 0.0425, lr_0 = 1.9720e-04
Loss = 1.9500e-03, PNorm = 153.1350, GNorm = 0.1695, lr_0 = 1.9707e-04
Loss = 1.6070e-03, PNorm = 153.1397, GNorm = 0.1180, lr_0 = 1.9693e-04
Loss = 1.5291e-03, PNorm = 153.1412, GNorm = 0.1398, lr_0 = 1.9680e-04
Loss = 2.5060e-03, PNorm = 153.1446, GNorm = 0.1276, lr_0 = 1.9666e-04
Loss = 2.3736e-03, PNorm = 153.1495, GNorm = 0.1853, lr_0 = 1.9653e-04
Loss = 1.7145e-03, PNorm = 153.1533, GNorm = 0.1633, lr_0 = 1.9639e-04
Loss = 1.6871e-03, PNorm = 153.1591, GNorm = 0.1427, lr_0 = 1.9626e-04
Loss = 2.0246e-03, PNorm = 153.1613, GNorm = 0.1026, lr_0 = 1.9612e-04
Loss = 2.8580e-03, PNorm = 153.1638, GNorm = 0.0978, lr_0 = 1.9599e-04
Loss = 1.5305e-03, PNorm = 153.1675, GNorm = 0.1127, lr_0 = 1.9585e-04
Loss = 1.8241e-03, PNorm = 153.1732, GNorm = 0.1383, lr_0 = 1.9572e-04
Loss = 1.9278e-03, PNorm = 153.1758, GNorm = 0.2407, lr_0 = 1.9559e-04
Loss = 2.0228e-03, PNorm = 153.1792, GNorm = 0.0483, lr_0 = 1.9545e-04
Loss = 5.2009e-03, PNorm = 153.1829, GNorm = 0.2889, lr_0 = 1.9532e-04
Loss = 2.5993e-03, PNorm = 153.1863, GNorm = 0.0568, lr_0 = 1.9518e-04
Loss = 1.5193e-03, PNorm = 153.1901, GNorm = 0.1214, lr_0 = 1.9505e-04
Loss = 2.3921e-03, PNorm = 153.1936, GNorm = 0.0834, lr_0 = 1.9492e-04
Loss = 1.5857e-03, PNorm = 153.1982, GNorm = 0.1344, lr_0 = 1.9478e-04
Loss = 2.2155e-03, PNorm = 153.1996, GNorm = 0.1238, lr_0 = 1.9465e-04
Loss = 4.1095e-03, PNorm = 153.2043, GNorm = 0.2377, lr_0 = 1.9452e-04
Loss = 2.8336e-03, PNorm = 153.2103, GNorm = 0.0658, lr_0 = 1.9438e-04
Loss = 1.6181e-03, PNorm = 153.2156, GNorm = 0.0969, lr_0 = 1.9425e-04
Loss = 2.4557e-03, PNorm = 153.2176, GNorm = 0.1551, lr_0 = 1.9412e-04
Loss = 3.3637e-03, PNorm = 153.2208, GNorm = 0.0641, lr_0 = 1.9398e-04
Loss = 3.3492e-03, PNorm = 153.2230, GNorm = 0.5542, lr_0 = 1.9385e-04
Loss = 1.8171e-03, PNorm = 153.2256, GNorm = 0.2042, lr_0 = 1.9372e-04
Loss = 1.7597e-03, PNorm = 153.2312, GNorm = 0.1666, lr_0 = 1.9359e-04
Loss = 3.0023e-03, PNorm = 153.2368, GNorm = 0.0819, lr_0 = 1.9345e-04
Loss = 3.9199e-03, PNorm = 153.2416, GNorm = 0.2419, lr_0 = 1.9332e-04
Loss = 3.1172e-03, PNorm = 153.2485, GNorm = 0.0941, lr_0 = 1.9319e-04
Loss = 1.6556e-03, PNorm = 153.2524, GNorm = 0.0730, lr_0 = 1.9306e-04
Validation mae = 0.475803
Epoch 22
Loss = 1.7061e-03, PNorm = 153.2542, GNorm = 0.1040, lr_0 = 1.9292e-04
Loss = 2.8899e-03, PNorm = 153.2566, GNorm = 0.1264, lr_0 = 1.9279e-04
Loss = 2.0258e-03, PNorm = 153.2601, GNorm = 0.1242, lr_0 = 1.9266e-04
Loss = 1.5605e-03, PNorm = 153.2629, GNorm = 0.1450, lr_0 = 1.9253e-04
Loss = 2.4149e-03, PNorm = 153.2639, GNorm = 0.3119, lr_0 = 1.9240e-04
Loss = 2.5285e-03, PNorm = 153.2615, GNorm = 0.1095, lr_0 = 1.9226e-04
Loss = 1.8757e-03, PNorm = 153.2620, GNorm = 0.0865, lr_0 = 1.9213e-04
Loss = 1.5294e-03, PNorm = 153.2658, GNorm = 0.1305, lr_0 = 1.9200e-04
Loss = 4.0454e-03, PNorm = 153.2692, GNorm = 0.0417, lr_0 = 1.9187e-04
Loss = 1.8540e-03, PNorm = 153.2758, GNorm = 0.0550, lr_0 = 1.9174e-04
Loss = 2.5286e-03, PNorm = 153.2809, GNorm = 0.3027, lr_0 = 1.9161e-04
Loss = 2.2073e-03, PNorm = 153.2843, GNorm = 0.2318, lr_0 = 1.9148e-04
Loss = 1.4425e-03, PNorm = 153.2871, GNorm = 0.1145, lr_0 = 1.9134e-04
Loss = 1.4618e-03, PNorm = 153.2900, GNorm = 0.1039, lr_0 = 1.9121e-04
Loss = 1.8077e-03, PNorm = 153.2951, GNorm = 0.1106, lr_0 = 1.9108e-04
Loss = 1.3120e-03, PNorm = 153.2987, GNorm = 0.0720, lr_0 = 1.9095e-04
Loss = 1.8099e-03, PNorm = 153.3007, GNorm = 0.1082, lr_0 = 1.9082e-04
Loss = 2.1556e-03, PNorm = 153.3027, GNorm = 0.0996, lr_0 = 1.9069e-04
Loss = 3.3862e-03, PNorm = 153.3028, GNorm = 0.0848, lr_0 = 1.9056e-04
Loss = 2.1500e-03, PNorm = 153.3067, GNorm = 0.2575, lr_0 = 1.9043e-04
Loss = 1.7571e-03, PNorm = 153.3098, GNorm = 0.1694, lr_0 = 1.9030e-04
Loss = 1.6582e-03, PNorm = 153.3135, GNorm = 0.1055, lr_0 = 1.9017e-04
Loss = 2.0743e-03, PNorm = 153.3177, GNorm = 0.1234, lr_0 = 1.9004e-04
Loss = 3.7289e-03, PNorm = 153.3217, GNorm = 0.0786, lr_0 = 1.8991e-04
Loss = 2.1586e-03, PNorm = 153.3255, GNorm = 0.1104, lr_0 = 1.8978e-04
Loss = 2.0596e-03, PNorm = 153.3296, GNorm = 0.3520, lr_0 = 1.8965e-04
Loss = 1.8101e-03, PNorm = 153.3305, GNorm = 0.3607, lr_0 = 1.8952e-04
Loss = 1.7732e-03, PNorm = 153.3340, GNorm = 0.3768, lr_0 = 1.8939e-04
Loss = 1.6173e-03, PNorm = 153.3374, GNorm = 0.0269, lr_0 = 1.8926e-04
Loss = 1.7111e-03, PNorm = 153.3402, GNorm = 0.1497, lr_0 = 1.8913e-04
Loss = 2.0896e-03, PNorm = 153.3433, GNorm = 0.1526, lr_0 = 1.8900e-04
Loss = 1.3022e-03, PNorm = 153.3486, GNorm = 0.0472, lr_0 = 1.8887e-04
Loss = 1.2863e-03, PNorm = 153.3513, GNorm = 0.1033, lr_0 = 1.8874e-04
Loss = 3.2149e-03, PNorm = 153.3554, GNorm = 0.1687, lr_0 = 1.8861e-04
Loss = 1.6483e-03, PNorm = 153.3572, GNorm = 0.2909, lr_0 = 1.8848e-04
Loss = 1.5002e-03, PNorm = 153.3611, GNorm = 0.0901, lr_0 = 1.8835e-04
Loss = 1.5495e-03, PNorm = 153.3644, GNorm = 0.1228, lr_0 = 1.8822e-04
Loss = 1.2705e-03, PNorm = 153.3678, GNorm = 0.0635, lr_0 = 1.8809e-04
Loss = 2.8612e-03, PNorm = 153.3714, GNorm = 0.0984, lr_0 = 1.8797e-04
Loss = 2.0675e-03, PNorm = 153.3753, GNorm = 0.1719, lr_0 = 1.8784e-04
Loss = 2.9814e-03, PNorm = 153.3783, GNorm = 0.1687, lr_0 = 1.8771e-04
Loss = 2.8570e-03, PNorm = 153.3829, GNorm = 0.1873, lr_0 = 1.8758e-04
Loss = 1.3441e-03, PNorm = 153.3849, GNorm = 0.0771, lr_0 = 1.8745e-04
Loss = 1.9954e-03, PNorm = 153.3882, GNorm = 0.0799, lr_0 = 1.8732e-04
Loss = 2.2457e-03, PNorm = 153.3900, GNorm = 0.1174, lr_0 = 1.8719e-04
Loss = 1.6347e-03, PNorm = 153.3957, GNorm = 0.1419, lr_0 = 1.8707e-04
Loss = 1.5517e-03, PNorm = 153.3990, GNorm = 0.0895, lr_0 = 1.8694e-04
Loss = 2.4542e-03, PNorm = 153.4016, GNorm = 0.1408, lr_0 = 1.8681e-04
Loss = 2.0099e-03, PNorm = 153.4056, GNorm = 0.1527, lr_0 = 1.8668e-04
Loss = 1.3330e-03, PNorm = 153.4083, GNorm = 0.1920, lr_0 = 1.8655e-04
Loss = 1.2084e-03, PNorm = 153.4104, GNorm = 0.0326, lr_0 = 1.8643e-04
Loss = 2.5131e-03, PNorm = 153.4128, GNorm = 0.2482, lr_0 = 1.8630e-04
Loss = 2.3074e-03, PNorm = 153.4142, GNorm = 0.0877, lr_0 = 1.8617e-04
Loss = 2.1174e-03, PNorm = 153.4165, GNorm = 0.0351, lr_0 = 1.8604e-04
Loss = 2.8670e-03, PNorm = 153.4198, GNorm = 0.0378, lr_0 = 1.8592e-04
Loss = 2.0320e-03, PNorm = 153.4229, GNorm = 0.1305, lr_0 = 1.8579e-04
Loss = 1.4836e-03, PNorm = 153.4248, GNorm = 0.0783, lr_0 = 1.8566e-04
Loss = 2.0945e-03, PNorm = 153.4248, GNorm = 0.0994, lr_0 = 1.8553e-04
Loss = 1.4256e-03, PNorm = 153.4263, GNorm = 0.0699, lr_0 = 1.8541e-04
Loss = 1.5798e-03, PNorm = 153.4283, GNorm = 0.0321, lr_0 = 1.8528e-04
Loss = 2.7937e-03, PNorm = 153.4309, GNorm = 0.0824, lr_0 = 1.8515e-04
Loss = 2.8596e-03, PNorm = 153.4355, GNorm = 0.0910, lr_0 = 1.8503e-04
Loss = 4.3510e-03, PNorm = 153.4392, GNorm = 0.1446, lr_0 = 1.8490e-04
Loss = 3.9390e-03, PNorm = 153.4415, GNorm = 0.1594, lr_0 = 1.8477e-04
Loss = 2.2312e-03, PNorm = 153.4470, GNorm = 0.0832, lr_0 = 1.8465e-04
Loss = 2.5234e-03, PNorm = 153.4528, GNorm = 0.0779, lr_0 = 1.8452e-04
Loss = 2.4784e-03, PNorm = 153.4546, GNorm = 0.2439, lr_0 = 1.8439e-04
Loss = 2.3174e-03, PNorm = 153.4552, GNorm = 0.2025, lr_0 = 1.8427e-04
Loss = 1.4450e-03, PNorm = 153.4582, GNorm = 0.1736, lr_0 = 1.8414e-04
Loss = 1.2326e-03, PNorm = 153.4618, GNorm = 0.0698, lr_0 = 1.8401e-04
Loss = 1.3997e-03, PNorm = 153.4657, GNorm = 0.0560, lr_0 = 1.8389e-04
Loss = 2.8659e-03, PNorm = 153.4674, GNorm = 0.1254, lr_0 = 1.8376e-04
Loss = 1.6166e-03, PNorm = 153.4714, GNorm = 0.1428, lr_0 = 1.8364e-04
Loss = 1.2998e-03, PNorm = 153.4765, GNorm = 0.1746, lr_0 = 1.8351e-04
Loss = 1.4382e-03, PNorm = 153.4796, GNorm = 0.0836, lr_0 = 1.8338e-04
Loss = 3.0374e-03, PNorm = 153.4818, GNorm = 0.1455, lr_0 = 1.8326e-04
Loss = 2.2827e-03, PNorm = 153.4834, GNorm = 0.0418, lr_0 = 1.8313e-04
Loss = 1.7654e-03, PNorm = 153.4867, GNorm = 0.1858, lr_0 = 1.8301e-04
Loss = 1.2234e-03, PNorm = 153.4898, GNorm = 0.1409, lr_0 = 1.8288e-04
Loss = 1.3926e-03, PNorm = 153.4935, GNorm = 0.1107, lr_0 = 1.8276e-04
Loss = 2.8111e-03, PNorm = 153.4991, GNorm = 0.0837, lr_0 = 1.8263e-04
Loss = 1.6108e-03, PNorm = 153.5022, GNorm = 0.1479, lr_0 = 1.8251e-04
Loss = 1.5083e-03, PNorm = 153.5046, GNorm = 0.2459, lr_0 = 1.8238e-04
Loss = 2.5840e-03, PNorm = 153.5106, GNorm = 0.2022, lr_0 = 1.8226e-04
Loss = 2.4676e-03, PNorm = 153.5137, GNorm = 0.2570, lr_0 = 1.8213e-04
Loss = 2.1754e-03, PNorm = 153.5148, GNorm = 0.0560, lr_0 = 1.8201e-04
Loss = 1.7131e-03, PNorm = 153.5165, GNorm = 0.1856, lr_0 = 1.8188e-04
Loss = 1.9849e-03, PNorm = 153.5211, GNorm = 0.3686, lr_0 = 1.8176e-04
Loss = 2.5620e-03, PNorm = 153.5247, GNorm = 0.2805, lr_0 = 1.8163e-04
Loss = 1.5296e-03, PNorm = 153.5281, GNorm = 0.1480, lr_0 = 1.8151e-04
Loss = 1.6450e-03, PNorm = 153.5310, GNorm = 0.1379, lr_0 = 1.8138e-04
Loss = 2.0803e-03, PNorm = 153.5334, GNorm = 0.0974, lr_0 = 1.8126e-04
Loss = 2.2390e-03, PNorm = 153.5370, GNorm = 0.1091, lr_0 = 1.8114e-04
Loss = 1.2692e-03, PNorm = 153.5416, GNorm = 0.1450, lr_0 = 1.8101e-04
Loss = 1.8046e-03, PNorm = 153.5441, GNorm = 0.0705, lr_0 = 1.8089e-04
Loss = 2.5460e-03, PNorm = 153.5479, GNorm = 0.1030, lr_0 = 1.8076e-04
Loss = 1.8466e-03, PNorm = 153.5505, GNorm = 0.1551, lr_0 = 1.8064e-04
Loss = 1.9409e-03, PNorm = 153.5550, GNorm = 0.1067, lr_0 = 1.8052e-04
Loss = 2.1491e-03, PNorm = 153.5604, GNorm = 0.1815, lr_0 = 1.8039e-04
Loss = 1.9611e-03, PNorm = 153.5665, GNorm = 0.0575, lr_0 = 1.8027e-04
Loss = 1.4870e-03, PNorm = 153.5739, GNorm = 0.0760, lr_0 = 1.8015e-04
Loss = 1.4000e-03, PNorm = 153.5769, GNorm = 0.0344, lr_0 = 1.8002e-04
Loss = 2.5928e-03, PNorm = 153.5795, GNorm = 0.1309, lr_0 = 1.7990e-04
Loss = 1.6249e-03, PNorm = 153.5807, GNorm = 0.1117, lr_0 = 1.7978e-04
Loss = 1.8884e-03, PNorm = 153.5845, GNorm = 0.0676, lr_0 = 1.7965e-04
Loss = 1.3428e-03, PNorm = 153.5875, GNorm = 0.1395, lr_0 = 1.7953e-04
Loss = 1.8147e-03, PNorm = 153.5901, GNorm = 0.0574, lr_0 = 1.7941e-04
Loss = 1.6608e-03, PNorm = 153.5935, GNorm = 0.0512, lr_0 = 1.7928e-04
Loss = 3.2312e-03, PNorm = 153.5953, GNorm = 0.2401, lr_0 = 1.7916e-04
Loss = 1.5598e-03, PNorm = 153.5970, GNorm = 0.0666, lr_0 = 1.7904e-04
Loss = 4.9222e-03, PNorm = 153.5974, GNorm = 0.2251, lr_0 = 1.7892e-04
Loss = 2.2931e-03, PNorm = 153.6018, GNorm = 0.3204, lr_0 = 1.7879e-04
Loss = 2.4760e-03, PNorm = 153.6064, GNorm = 0.1935, lr_0 = 1.7867e-04
Loss = 1.7606e-03, PNorm = 153.6116, GNorm = 0.1332, lr_0 = 1.7855e-04
Loss = 2.9878e-03, PNorm = 153.6180, GNorm = 0.1384, lr_0 = 1.7843e-04
Loss = 2.2367e-03, PNorm = 153.6225, GNorm = 0.0759, lr_0 = 1.7830e-04
Loss = 5.4759e-03, PNorm = 153.6270, GNorm = 0.0421, lr_0 = 1.7818e-04
Loss = 1.3403e-03, PNorm = 153.6299, GNorm = 0.0361, lr_0 = 1.7806e-04
Loss = 1.5984e-03, PNorm = 153.6325, GNorm = 0.0612, lr_0 = 1.7794e-04
Loss = 1.6447e-03, PNorm = 153.6339, GNorm = 0.1278, lr_0 = 1.7782e-04
Validation mae = 0.474869
Epoch 23
Loss = 2.1479e-03, PNorm = 153.6331, GNorm = 0.1183, lr_0 = 1.7769e-04
Loss = 2.3117e-03, PNorm = 153.6353, GNorm = 0.0764, lr_0 = 1.7757e-04
Loss = 1.3535e-03, PNorm = 153.6369, GNorm = 0.2370, lr_0 = 1.7745e-04
Loss = 1.5485e-03, PNorm = 153.6380, GNorm = 0.0830, lr_0 = 1.7733e-04
Loss = 1.5624e-03, PNorm = 153.6416, GNorm = 0.0476, lr_0 = 1.7721e-04
Loss = 2.2940e-03, PNorm = 153.6452, GNorm = 0.0372, lr_0 = 1.7709e-04
Loss = 1.0341e-03, PNorm = 153.6484, GNorm = 0.1177, lr_0 = 1.7696e-04
Loss = 1.2944e-03, PNorm = 153.6499, GNorm = 0.1848, lr_0 = 1.7684e-04
Loss = 2.8014e-03, PNorm = 153.6538, GNorm = 0.1120, lr_0 = 1.7672e-04
Loss = 1.2190e-03, PNorm = 153.6556, GNorm = 0.1486, lr_0 = 1.7660e-04
Loss = 2.8430e-03, PNorm = 153.6578, GNorm = 0.1164, lr_0 = 1.7648e-04
Loss = 1.3823e-03, PNorm = 153.6599, GNorm = 0.0506, lr_0 = 1.7636e-04
Loss = 1.2888e-03, PNorm = 153.6649, GNorm = 0.1212, lr_0 = 1.7624e-04
Loss = 1.4425e-03, PNorm = 153.6690, GNorm = 0.0837, lr_0 = 1.7612e-04
Loss = 1.6277e-03, PNorm = 153.6704, GNorm = 0.1535, lr_0 = 1.7600e-04
Loss = 1.5305e-03, PNorm = 153.6724, GNorm = 0.0722, lr_0 = 1.7588e-04
Loss = 3.0491e-03, PNorm = 153.6766, GNorm = 0.1574, lr_0 = 1.7576e-04
Loss = 2.0742e-03, PNorm = 153.6782, GNorm = 0.1363, lr_0 = 1.7564e-04
Loss = 1.6547e-03, PNorm = 153.6771, GNorm = 0.1051, lr_0 = 1.7552e-04
Loss = 1.3064e-03, PNorm = 153.6808, GNorm = 0.1687, lr_0 = 1.7540e-04
Loss = 1.0284e-03, PNorm = 153.6829, GNorm = 0.0870, lr_0 = 1.7528e-04
Loss = 1.1715e-03, PNorm = 153.6858, GNorm = 0.0630, lr_0 = 1.7516e-04
Loss = 1.4515e-03, PNorm = 153.6891, GNorm = 0.0836, lr_0 = 1.7504e-04
Loss = 1.4873e-03, PNorm = 153.6931, GNorm = 0.0941, lr_0 = 1.7492e-04
Loss = 2.3389e-03, PNorm = 153.6946, GNorm = 0.3062, lr_0 = 1.7480e-04
Loss = 1.4033e-03, PNorm = 153.6967, GNorm = 0.1649, lr_0 = 1.7468e-04
Loss = 5.3480e-03, PNorm = 153.7008, GNorm = 0.1080, lr_0 = 1.7456e-04
Loss = 1.3484e-03, PNorm = 153.7029, GNorm = 0.2907, lr_0 = 1.7444e-04
Loss = 3.0628e-03, PNorm = 153.7077, GNorm = 0.0949, lr_0 = 1.7432e-04
Loss = 1.4217e-03, PNorm = 153.7096, GNorm = 0.2226, lr_0 = 1.7420e-04
Loss = 2.1559e-03, PNorm = 153.7119, GNorm = 0.0415, lr_0 = 1.7408e-04
Loss = 1.1959e-03, PNorm = 153.7161, GNorm = 0.1319, lr_0 = 1.7396e-04
Loss = 2.0831e-03, PNorm = 153.7204, GNorm = 0.0936, lr_0 = 1.7384e-04
Loss = 2.1642e-03, PNorm = 153.7218, GNorm = 0.0551, lr_0 = 1.7372e-04
Loss = 2.2918e-03, PNorm = 153.7251, GNorm = 0.0973, lr_0 = 1.7360e-04
Loss = 2.5314e-03, PNorm = 153.7271, GNorm = 0.1170, lr_0 = 1.7348e-04
Loss = 3.2371e-03, PNorm = 153.7278, GNorm = 0.0652, lr_0 = 1.7336e-04
Loss = 1.2260e-03, PNorm = 153.7313, GNorm = 0.1008, lr_0 = 1.7325e-04
Loss = 1.7069e-03, PNorm = 153.7351, GNorm = 0.1234, lr_0 = 1.7313e-04
Loss = 2.1706e-03, PNorm = 153.7386, GNorm = 0.2033, lr_0 = 1.7301e-04
Loss = 1.6175e-03, PNorm = 153.7423, GNorm = 0.1485, lr_0 = 1.7289e-04
Loss = 1.8739e-03, PNorm = 153.7446, GNorm = 0.2072, lr_0 = 1.7277e-04
Loss = 1.4647e-03, PNorm = 153.7468, GNorm = 0.1148, lr_0 = 1.7265e-04
Loss = 1.1789e-03, PNorm = 153.7497, GNorm = 0.1857, lr_0 = 1.7253e-04
Loss = 1.3061e-03, PNorm = 153.7525, GNorm = 0.0782, lr_0 = 1.7242e-04
Loss = 1.5328e-03, PNorm = 153.7562, GNorm = 0.1403, lr_0 = 1.7230e-04
Loss = 2.0011e-03, PNorm = 153.7592, GNorm = 0.1457, lr_0 = 1.7218e-04
Loss = 1.0268e-03, PNorm = 153.7624, GNorm = 0.1888, lr_0 = 1.7206e-04
Loss = 2.3555e-03, PNorm = 153.7664, GNorm = 0.1608, lr_0 = 1.7194e-04
Loss = 1.1092e-03, PNorm = 153.7708, GNorm = 0.0686, lr_0 = 1.7183e-04
Loss = 2.4808e-03, PNorm = 153.7714, GNorm = 0.2159, lr_0 = 1.7171e-04
Loss = 1.8804e-03, PNorm = 153.7752, GNorm = 0.1453, lr_0 = 1.7159e-04
Loss = 1.2209e-03, PNorm = 153.7786, GNorm = 0.1992, lr_0 = 1.7147e-04
Loss = 2.0297e-03, PNorm = 153.7828, GNorm = 0.0910, lr_0 = 1.7136e-04
Loss = 1.5088e-03, PNorm = 153.7835, GNorm = 0.2305, lr_0 = 1.7124e-04
Loss = 1.6616e-03, PNorm = 153.7868, GNorm = 0.1353, lr_0 = 1.7112e-04
Loss = 1.2561e-03, PNorm = 153.7905, GNorm = 0.1441, lr_0 = 1.7100e-04
Loss = 1.2781e-03, PNorm = 153.7940, GNorm = 0.0559, lr_0 = 1.7089e-04
Loss = 1.0918e-03, PNorm = 153.7968, GNorm = 0.0617, lr_0 = 1.7077e-04
Loss = 1.4563e-03, PNorm = 153.7977, GNorm = 0.0660, lr_0 = 1.7065e-04
Loss = 2.3397e-03, PNorm = 153.8012, GNorm = 0.1523, lr_0 = 1.7054e-04
Loss = 2.2887e-03, PNorm = 153.8030, GNorm = 0.1094, lr_0 = 1.7042e-04
Loss = 5.3184e-03, PNorm = 153.8063, GNorm = 0.2203, lr_0 = 1.7030e-04
Loss = 1.0909e-03, PNorm = 153.8074, GNorm = 0.0619, lr_0 = 1.7019e-04
Loss = 3.7932e-03, PNorm = 153.8095, GNorm = 0.1610, lr_0 = 1.7007e-04
Loss = 1.3633e-03, PNorm = 153.8143, GNorm = 0.2506, lr_0 = 1.6995e-04
Loss = 1.5380e-03, PNorm = 153.8206, GNorm = 0.1033, lr_0 = 1.6984e-04
Loss = 3.1979e-03, PNorm = 153.8235, GNorm = 0.1200, lr_0 = 1.6972e-04
Loss = 2.0151e-03, PNorm = 153.8270, GNorm = 0.0646, lr_0 = 1.6960e-04
Loss = 1.5884e-03, PNorm = 153.8313, GNorm = 0.1602, lr_0 = 1.6949e-04
Loss = 2.6794e-03, PNorm = 153.8364, GNorm = 0.1628, lr_0 = 1.6937e-04
Loss = 1.4323e-03, PNorm = 153.8394, GNorm = 0.0983, lr_0 = 1.6926e-04
Loss = 3.2939e-03, PNorm = 153.8412, GNorm = 0.1262, lr_0 = 1.6914e-04
Loss = 2.0109e-03, PNorm = 153.8445, GNorm = 0.1111, lr_0 = 1.6902e-04
Loss = 1.2132e-03, PNorm = 153.8474, GNorm = 0.0543, lr_0 = 1.6891e-04
Loss = 3.0203e-03, PNorm = 153.8504, GNorm = 0.1900, lr_0 = 1.6879e-04
Loss = 2.4243e-03, PNorm = 153.8519, GNorm = 0.0692, lr_0 = 1.6868e-04
Loss = 1.1294e-03, PNorm = 153.8557, GNorm = 0.0618, lr_0 = 1.6856e-04
Loss = 1.3521e-03, PNorm = 153.8595, GNorm = 0.0472, lr_0 = 1.6845e-04
Loss = 1.1261e-03, PNorm = 153.8618, GNorm = 0.0938, lr_0 = 1.6833e-04
Loss = 2.8022e-03, PNorm = 153.8633, GNorm = 0.1060, lr_0 = 1.6821e-04
Loss = 1.2602e-03, PNorm = 153.8644, GNorm = 0.2229, lr_0 = 1.6810e-04
Loss = 1.1979e-03, PNorm = 153.8677, GNorm = 0.1470, lr_0 = 1.6798e-04
Loss = 1.1204e-03, PNorm = 153.8704, GNorm = 0.0430, lr_0 = 1.6787e-04
Loss = 1.5856e-03, PNorm = 153.8741, GNorm = 0.3455, lr_0 = 1.6775e-04
Loss = 1.1122e-03, PNorm = 153.8740, GNorm = 0.0436, lr_0 = 1.6764e-04
Loss = 1.4281e-03, PNorm = 153.8772, GNorm = 0.1947, lr_0 = 1.6752e-04
Loss = 1.9594e-03, PNorm = 153.8789, GNorm = 0.1792, lr_0 = 1.6741e-04
Loss = 1.9353e-03, PNorm = 153.8843, GNorm = 0.0649, lr_0 = 1.6729e-04
Loss = 1.4996e-03, PNorm = 153.8874, GNorm = 0.1098, lr_0 = 1.6718e-04
Loss = 4.0241e-03, PNorm = 153.8896, GNorm = 0.1391, lr_0 = 1.6707e-04
Loss = 1.5164e-03, PNorm = 153.8956, GNorm = 0.0500, lr_0 = 1.6695e-04
Loss = 2.1517e-03, PNorm = 153.9006, GNorm = 0.2224, lr_0 = 1.6684e-04
Loss = 2.8143e-03, PNorm = 153.9061, GNorm = 0.1558, lr_0 = 1.6672e-04
Loss = 1.0550e-03, PNorm = 153.9096, GNorm = 0.1358, lr_0 = 1.6661e-04
Loss = 1.6716e-03, PNorm = 153.9110, GNorm = 0.4849, lr_0 = 1.6649e-04
Loss = 2.6179e-03, PNorm = 153.9142, GNorm = 0.0452, lr_0 = 1.6638e-04
Loss = 1.5205e-03, PNorm = 153.9168, GNorm = 0.1467, lr_0 = 1.6627e-04
Loss = 2.3110e-03, PNorm = 153.9192, GNorm = 0.1604, lr_0 = 1.6615e-04
Loss = 1.4742e-03, PNorm = 153.9234, GNorm = 0.0924, lr_0 = 1.6604e-04
Loss = 1.8741e-03, PNorm = 153.9262, GNorm = 0.0696, lr_0 = 1.6592e-04
Loss = 2.4325e-03, PNorm = 153.9300, GNorm = 0.0991, lr_0 = 1.6581e-04
Loss = 1.8792e-03, PNorm = 153.9336, GNorm = 0.1242, lr_0 = 1.6570e-04
Loss = 2.3468e-03, PNorm = 153.9373, GNorm = 0.0886, lr_0 = 1.6558e-04
Loss = 1.3321e-03, PNorm = 153.9403, GNorm = 0.3157, lr_0 = 1.6547e-04
Loss = 1.2695e-03, PNorm = 153.9414, GNorm = 0.0426, lr_0 = 1.6536e-04
Loss = 3.0681e-03, PNorm = 153.9444, GNorm = 0.0525, lr_0 = 1.6524e-04
Loss = 1.1010e-03, PNorm = 153.9473, GNorm = 0.0570, lr_0 = 1.6513e-04
Loss = 1.1890e-03, PNorm = 153.9478, GNorm = 0.1389, lr_0 = 1.6502e-04
Loss = 1.1422e-03, PNorm = 153.9488, GNorm = 0.0367, lr_0 = 1.6490e-04
Loss = 2.5785e-03, PNorm = 153.9530, GNorm = 0.2864, lr_0 = 1.6479e-04
Loss = 1.2907e-03, PNorm = 153.9568, GNorm = 0.1524, lr_0 = 1.6468e-04
Loss = 1.4343e-03, PNorm = 153.9629, GNorm = 0.0822, lr_0 = 1.6457e-04
Loss = 1.8599e-03, PNorm = 153.9647, GNorm = 0.0732, lr_0 = 1.6445e-04
Loss = 2.0828e-03, PNorm = 153.9662, GNorm = 0.3035, lr_0 = 1.6434e-04
Loss = 1.5140e-03, PNorm = 153.9668, GNorm = 0.0563, lr_0 = 1.6423e-04
Loss = 2.3421e-03, PNorm = 153.9694, GNorm = 0.2936, lr_0 = 1.6412e-04
Loss = 1.2763e-03, PNorm = 153.9723, GNorm = 0.0905, lr_0 = 1.6400e-04
Loss = 2.1868e-03, PNorm = 153.9748, GNorm = 0.0521, lr_0 = 1.6389e-04
Loss = 1.4452e-03, PNorm = 153.9780, GNorm = 0.1466, lr_0 = 1.6378e-04
Validation mae = 0.474287
Epoch 24
Loss = 1.0267e-03, PNorm = 153.9797, GNorm = 0.1273, lr_0 = 1.6367e-04
Loss = 1.2615e-03, PNorm = 153.9812, GNorm = 0.1163, lr_0 = 1.6355e-04
Loss = 1.7134e-03, PNorm = 153.9831, GNorm = 0.0404, lr_0 = 1.6344e-04
Loss = 1.0665e-03, PNorm = 153.9864, GNorm = 0.0858, lr_0 = 1.6333e-04
Loss = 1.6587e-03, PNorm = 153.9902, GNorm = 0.2325, lr_0 = 1.6322e-04
Loss = 1.3371e-03, PNorm = 153.9914, GNorm = 0.1298, lr_0 = 1.6311e-04
Loss = 1.4138e-03, PNorm = 153.9926, GNorm = 0.0992, lr_0 = 1.6299e-04
Loss = 1.3343e-03, PNorm = 153.9944, GNorm = 0.0497, lr_0 = 1.6288e-04
Loss = 1.1507e-03, PNorm = 153.9974, GNorm = 0.1045, lr_0 = 1.6277e-04
Loss = 1.5738e-03, PNorm = 154.0011, GNorm = 0.1045, lr_0 = 1.6266e-04
Loss = 1.1808e-03, PNorm = 154.0038, GNorm = 0.2154, lr_0 = 1.6255e-04
Loss = 1.0624e-03, PNorm = 154.0051, GNorm = 0.0559, lr_0 = 1.6244e-04
Loss = 1.6132e-03, PNorm = 154.0072, GNorm = 0.1342, lr_0 = 1.6233e-04
Loss = 1.9927e-03, PNorm = 154.0079, GNorm = 0.1178, lr_0 = 1.6221e-04
Loss = 9.4694e-04, PNorm = 154.0107, GNorm = 0.0811, lr_0 = 1.6210e-04
Loss = 1.7561e-03, PNorm = 154.0132, GNorm = 0.0995, lr_0 = 1.6199e-04
Loss = 1.5704e-03, PNorm = 154.0164, GNorm = 0.0878, lr_0 = 1.6188e-04
Loss = 3.0273e-03, PNorm = 154.0189, GNorm = 0.0940, lr_0 = 1.6177e-04
Loss = 2.8957e-03, PNorm = 154.0218, GNorm = 0.3938, lr_0 = 1.6166e-04
Loss = 2.0637e-03, PNorm = 154.0244, GNorm = 0.0615, lr_0 = 1.6155e-04
Loss = 8.4963e-04, PNorm = 154.0258, GNorm = 0.0860, lr_0 = 1.6144e-04
Loss = 2.6985e-03, PNorm = 154.0291, GNorm = 0.2496, lr_0 = 1.6133e-04
Loss = 1.1622e-03, PNorm = 154.0305, GNorm = 0.1444, lr_0 = 1.6122e-04
Loss = 1.0275e-03, PNorm = 154.0314, GNorm = 0.1081, lr_0 = 1.6111e-04
Loss = 1.0811e-03, PNorm = 154.0343, GNorm = 0.1816, lr_0 = 1.6100e-04
Loss = 1.4048e-03, PNorm = 154.0369, GNorm = 0.1066, lr_0 = 1.6089e-04
Loss = 1.9829e-03, PNorm = 154.0392, GNorm = 0.0491, lr_0 = 1.6078e-04
Loss = 9.3072e-04, PNorm = 154.0418, GNorm = 0.0717, lr_0 = 1.6067e-04
Loss = 1.0189e-03, PNorm = 154.0433, GNorm = 0.0590, lr_0 = 1.6056e-04
Loss = 1.7695e-03, PNorm = 154.0458, GNorm = 0.3102, lr_0 = 1.6045e-04
Loss = 2.1398e-03, PNorm = 154.0492, GNorm = 0.0452, lr_0 = 1.6034e-04
Loss = 1.0186e-03, PNorm = 154.0529, GNorm = 0.0855, lr_0 = 1.6023e-04
Loss = 1.6026e-03, PNorm = 154.0544, GNorm = 0.0546, lr_0 = 1.6012e-04
Loss = 1.2148e-03, PNorm = 154.0567, GNorm = 0.0676, lr_0 = 1.6001e-04
Loss = 1.6729e-03, PNorm = 154.0573, GNorm = 0.1841, lr_0 = 1.5990e-04
Loss = 2.1967e-03, PNorm = 154.0605, GNorm = 0.2059, lr_0 = 1.5979e-04
Loss = 1.0780e-03, PNorm = 154.0636, GNorm = 0.1933, lr_0 = 1.5968e-04
Loss = 1.0399e-03, PNorm = 154.0674, GNorm = 0.0381, lr_0 = 1.5957e-04
Loss = 1.0788e-03, PNorm = 154.0714, GNorm = 0.0694, lr_0 = 1.5946e-04
Loss = 9.5307e-04, PNorm = 154.0752, GNorm = 0.0359, lr_0 = 1.5935e-04
Loss = 1.7943e-03, PNorm = 154.0767, GNorm = 0.1065, lr_0 = 1.5924e-04
Loss = 1.4318e-03, PNorm = 154.0787, GNorm = 0.1821, lr_0 = 1.5913e-04
Loss = 1.3911e-03, PNorm = 154.0807, GNorm = 0.0923, lr_0 = 1.5902e-04
Loss = 9.2204e-04, PNorm = 154.0827, GNorm = 0.0422, lr_0 = 1.5891e-04
Loss = 1.2676e-03, PNorm = 154.0847, GNorm = 0.0645, lr_0 = 1.5880e-04
Loss = 1.4109e-03, PNorm = 154.0854, GNorm = 0.0567, lr_0 = 1.5870e-04
Loss = 1.6383e-03, PNorm = 154.0861, GNorm = 0.0768, lr_0 = 1.5859e-04
Loss = 1.5499e-03, PNorm = 154.0872, GNorm = 0.1133, lr_0 = 1.5848e-04
Loss = 1.4952e-03, PNorm = 154.0907, GNorm = 0.1965, lr_0 = 1.5837e-04
Loss = 2.9326e-03, PNorm = 154.0943, GNorm = 0.1687, lr_0 = 1.5826e-04
Loss = 1.0519e-03, PNorm = 154.0966, GNorm = 0.0341, lr_0 = 1.5815e-04
Loss = 1.1804e-03, PNorm = 154.0996, GNorm = 0.0310, lr_0 = 1.5804e-04
Loss = 1.2211e-03, PNorm = 154.1027, GNorm = 0.1105, lr_0 = 1.5794e-04
Loss = 1.9570e-03, PNorm = 154.1043, GNorm = 0.1641, lr_0 = 1.5783e-04
Loss = 2.2254e-03, PNorm = 154.1050, GNorm = 0.0401, lr_0 = 1.5772e-04
Loss = 2.0229e-03, PNorm = 154.1076, GNorm = 0.1736, lr_0 = 1.5761e-04
Loss = 1.5347e-03, PNorm = 154.1111, GNorm = 0.1960, lr_0 = 1.5750e-04
Loss = 3.0314e-03, PNorm = 154.1132, GNorm = 0.1331, lr_0 = 1.5740e-04
Loss = 1.6567e-03, PNorm = 154.1159, GNorm = 0.0815, lr_0 = 1.5729e-04
Loss = 1.6332e-03, PNorm = 154.1180, GNorm = 0.3338, lr_0 = 1.5718e-04
Loss = 2.1222e-03, PNorm = 154.1200, GNorm = 0.0778, lr_0 = 1.5707e-04
Loss = 1.4436e-03, PNorm = 154.1239, GNorm = 0.1342, lr_0 = 1.5697e-04
Loss = 1.0065e-03, PNorm = 154.1257, GNorm = 0.1162, lr_0 = 1.5686e-04
Loss = 1.1604e-03, PNorm = 154.1286, GNorm = 0.0334, lr_0 = 1.5675e-04
Loss = 2.5731e-03, PNorm = 154.1297, GNorm = 0.0864, lr_0 = 1.5664e-04
Loss = 3.3077e-03, PNorm = 154.1307, GNorm = 0.1231, lr_0 = 1.5654e-04
Loss = 1.0258e-03, PNorm = 154.1351, GNorm = 0.0411, lr_0 = 1.5643e-04
Loss = 9.0561e-04, PNorm = 154.1379, GNorm = 0.1547, lr_0 = 1.5632e-04
Loss = 9.9391e-04, PNorm = 154.1408, GNorm = 0.1356, lr_0 = 1.5621e-04
Loss = 1.1948e-03, PNorm = 154.1443, GNorm = 0.1377, lr_0 = 1.5611e-04
Loss = 3.1109e-03, PNorm = 154.1472, GNorm = 0.0650, lr_0 = 1.5600e-04
Loss = 9.9220e-04, PNorm = 154.1497, GNorm = 0.1375, lr_0 = 1.5589e-04
Loss = 3.1204e-03, PNorm = 154.1533, GNorm = 0.0531, lr_0 = 1.5579e-04
Loss = 1.5740e-03, PNorm = 154.1560, GNorm = 0.0311, lr_0 = 1.5568e-04
Loss = 1.1529e-03, PNorm = 154.1588, GNorm = 0.0688, lr_0 = 1.5557e-04
Loss = 1.6628e-03, PNorm = 154.1614, GNorm = 0.0805, lr_0 = 1.5547e-04
Loss = 1.6928e-03, PNorm = 154.1643, GNorm = 0.0844, lr_0 = 1.5536e-04
Loss = 2.3834e-03, PNorm = 154.1673, GNorm = 0.0823, lr_0 = 1.5525e-04
Loss = 2.0473e-03, PNorm = 154.1695, GNorm = 0.0701, lr_0 = 1.5515e-04
Loss = 1.5609e-03, PNorm = 154.1734, GNorm = 0.0437, lr_0 = 1.5504e-04
Loss = 1.3987e-03, PNorm = 154.1760, GNorm = 0.0687, lr_0 = 1.5493e-04
Loss = 1.5132e-03, PNorm = 154.1796, GNorm = 0.0866, lr_0 = 1.5483e-04
Loss = 3.9502e-03, PNorm = 154.1820, GNorm = 0.0484, lr_0 = 1.5472e-04
Loss = 2.2004e-03, PNorm = 154.1861, GNorm = 0.2015, lr_0 = 1.5462e-04
Loss = 1.0157e-03, PNorm = 154.1879, GNorm = 0.2448, lr_0 = 1.5451e-04
Loss = 1.1996e-03, PNorm = 154.1904, GNorm = 0.0852, lr_0 = 1.5440e-04
Loss = 1.0945e-03, PNorm = 154.1936, GNorm = 0.1056, lr_0 = 1.5430e-04
Loss = 1.1640e-03, PNorm = 154.1964, GNorm = 0.1680, lr_0 = 1.5419e-04
Loss = 1.0292e-03, PNorm = 154.1978, GNorm = 0.0739, lr_0 = 1.5409e-04
Loss = 2.1397e-03, PNorm = 154.1996, GNorm = 0.1146, lr_0 = 1.5398e-04
Loss = 8.9054e-04, PNorm = 154.2023, GNorm = 0.2034, lr_0 = 1.5388e-04
Loss = 1.2329e-03, PNorm = 154.2038, GNorm = 0.0873, lr_0 = 1.5377e-04
Loss = 1.2074e-03, PNorm = 154.2065, GNorm = 0.0401, lr_0 = 1.5367e-04
Loss = 1.5311e-03, PNorm = 154.2093, GNorm = 0.0813, lr_0 = 1.5356e-04
Loss = 1.3091e-03, PNorm = 154.2119, GNorm = 0.2028, lr_0 = 1.5346e-04
Loss = 1.5044e-03, PNorm = 154.2132, GNorm = 0.1037, lr_0 = 1.5335e-04
Loss = 2.4395e-03, PNorm = 154.2144, GNorm = 0.0797, lr_0 = 1.5325e-04
Loss = 2.9086e-03, PNorm = 154.2171, GNorm = 0.2289, lr_0 = 1.5314e-04
Loss = 1.1202e-03, PNorm = 154.2195, GNorm = 0.1399, lr_0 = 1.5304e-04
Loss = 1.1301e-03, PNorm = 154.2212, GNorm = 0.1159, lr_0 = 1.5293e-04
Loss = 3.8077e-03, PNorm = 154.2242, GNorm = 0.1391, lr_0 = 1.5283e-04
Loss = 8.8965e-04, PNorm = 154.2270, GNorm = 0.0699, lr_0 = 1.5272e-04
Loss = 2.5099e-03, PNorm = 154.2301, GNorm = 0.1488, lr_0 = 1.5262e-04
Loss = 2.7569e-03, PNorm = 154.2317, GNorm = 0.0966, lr_0 = 1.5251e-04
Loss = 1.6241e-03, PNorm = 154.2318, GNorm = 0.1792, lr_0 = 1.5241e-04
Loss = 1.0740e-03, PNorm = 154.2334, GNorm = 0.1022, lr_0 = 1.5230e-04
Loss = 3.9022e-03, PNorm = 154.2341, GNorm = 0.2287, lr_0 = 1.5220e-04
Loss = 2.0586e-03, PNorm = 154.2376, GNorm = 0.0897, lr_0 = 1.5209e-04
Loss = 4.2071e-03, PNorm = 154.2412, GNorm = 0.0998, lr_0 = 1.5199e-04
Loss = 3.1222e-03, PNorm = 154.2447, GNorm = 0.1338, lr_0 = 1.5189e-04
Loss = 1.8621e-03, PNorm = 154.2469, GNorm = 0.2178, lr_0 = 1.5178e-04
Loss = 1.1492e-03, PNorm = 154.2513, GNorm = 0.1697, lr_0 = 1.5168e-04
Loss = 1.3163e-03, PNorm = 154.2543, GNorm = 0.0709, lr_0 = 1.5157e-04
Loss = 1.4278e-03, PNorm = 154.2563, GNorm = 0.1009, lr_0 = 1.5147e-04
Loss = 2.8436e-03, PNorm = 154.2585, GNorm = 0.3870, lr_0 = 1.5137e-04
Loss = 1.3386e-03, PNorm = 154.2618, GNorm = 0.1179, lr_0 = 1.5126e-04
Loss = 1.5036e-03, PNorm = 154.2640, GNorm = 0.2637, lr_0 = 1.5116e-04
Loss = 2.0169e-03, PNorm = 154.2669, GNorm = 0.3028, lr_0 = 1.5106e-04
Loss = 1.7129e-03, PNorm = 154.2714, GNorm = 0.0413, lr_0 = 1.5095e-04
Loss = 1.4301e-03, PNorm = 154.2736, GNorm = 0.0815, lr_0 = 1.5085e-04
Validation mae = 0.474928
Epoch 25
Loss = 1.2102e-03, PNorm = 154.2749, GNorm = 0.1143, lr_0 = 1.5075e-04
Loss = 1.2359e-03, PNorm = 154.2760, GNorm = 0.0382, lr_0 = 1.5064e-04
Loss = 8.8141e-04, PNorm = 154.2788, GNorm = 0.0816, lr_0 = 1.5054e-04
Loss = 1.1443e-03, PNorm = 154.2809, GNorm = 0.1651, lr_0 = 1.5044e-04
Loss = 1.0774e-03, PNorm = 154.2819, GNorm = 0.0815, lr_0 = 1.5033e-04
Loss = 1.1072e-03, PNorm = 154.2828, GNorm = 0.0589, lr_0 = 1.5023e-04
Loss = 9.5031e-04, PNorm = 154.2847, GNorm = 0.1162, lr_0 = 1.5013e-04
Loss = 9.9163e-04, PNorm = 154.2875, GNorm = 0.0718, lr_0 = 1.5002e-04
Loss = 1.7265e-03, PNorm = 154.2882, GNorm = 0.1438, lr_0 = 1.4992e-04
Loss = 1.0884e-03, PNorm = 154.2900, GNorm = 0.0517, lr_0 = 1.4982e-04
Loss = 2.0314e-03, PNorm = 154.2896, GNorm = 0.1041, lr_0 = 1.4972e-04
Loss = 8.9994e-04, PNorm = 154.2909, GNorm = 0.1833, lr_0 = 1.4961e-04
Loss = 1.3770e-03, PNorm = 154.2915, GNorm = 0.1178, lr_0 = 1.4951e-04
Loss = 1.1556e-03, PNorm = 154.2931, GNorm = 0.0359, lr_0 = 1.4941e-04
Loss = 9.4843e-04, PNorm = 154.2938, GNorm = 0.0387, lr_0 = 1.4931e-04
Loss = 1.1352e-03, PNorm = 154.2959, GNorm = 0.1015, lr_0 = 1.4920e-04
Loss = 9.7896e-04, PNorm = 154.2974, GNorm = 0.0738, lr_0 = 1.4910e-04
Loss = 1.1078e-03, PNorm = 154.3000, GNorm = 0.0491, lr_0 = 1.4900e-04
Loss = 1.6385e-03, PNorm = 154.3025, GNorm = 0.0849, lr_0 = 1.4890e-04
Loss = 1.1121e-03, PNorm = 154.3040, GNorm = 0.1039, lr_0 = 1.4880e-04
Loss = 1.5609e-03, PNorm = 154.3048, GNorm = 0.1309, lr_0 = 1.4869e-04
Loss = 8.0725e-04, PNorm = 154.3056, GNorm = 0.1795, lr_0 = 1.4859e-04
Loss = 3.2844e-03, PNorm = 154.3068, GNorm = 0.1098, lr_0 = 1.4849e-04
Loss = 1.2594e-03, PNorm = 154.3076, GNorm = 0.1599, lr_0 = 1.4839e-04
Loss = 9.2933e-04, PNorm = 154.3091, GNorm = 0.0849, lr_0 = 1.4829e-04
Loss = 1.7056e-03, PNorm = 154.3107, GNorm = 0.1224, lr_0 = 1.4818e-04
Loss = 7.8744e-04, PNorm = 154.3125, GNorm = 0.1417, lr_0 = 1.4808e-04
Loss = 3.7831e-03, PNorm = 154.3137, GNorm = 0.2391, lr_0 = 1.4798e-04
Loss = 1.6412e-03, PNorm = 154.3160, GNorm = 0.0789, lr_0 = 1.4788e-04
Loss = 1.3194e-03, PNorm = 154.3192, GNorm = 0.1119, lr_0 = 1.4778e-04
Loss = 9.5727e-04, PNorm = 154.3203, GNorm = 0.0996, lr_0 = 1.4768e-04
Loss = 1.3987e-03, PNorm = 154.3223, GNorm = 0.1371, lr_0 = 1.4758e-04
Loss = 1.4134e-03, PNorm = 154.3251, GNorm = 0.0832, lr_0 = 1.4748e-04
Loss = 1.1434e-03, PNorm = 154.3245, GNorm = 0.1300, lr_0 = 1.4737e-04
Loss = 9.0465e-04, PNorm = 154.3260, GNorm = 0.0614, lr_0 = 1.4727e-04
Loss = 1.5460e-03, PNorm = 154.3275, GNorm = 0.0703, lr_0 = 1.4717e-04
Loss = 9.7398e-04, PNorm = 154.3308, GNorm = 0.0283, lr_0 = 1.4707e-04
Loss = 2.8113e-03, PNorm = 154.3331, GNorm = 0.0859, lr_0 = 1.4697e-04
Loss = 3.6272e-03, PNorm = 154.3348, GNorm = 0.1077, lr_0 = 1.4687e-04
Loss = 8.4375e-04, PNorm = 154.3376, GNorm = 0.0988, lr_0 = 1.4677e-04
Loss = 1.2485e-03, PNorm = 154.3393, GNorm = 0.0750, lr_0 = 1.4667e-04
Loss = 1.8232e-03, PNorm = 154.3407, GNorm = 0.0288, lr_0 = 1.4657e-04
Loss = 1.0781e-03, PNorm = 154.3424, GNorm = 0.0432, lr_0 = 1.4647e-04
Loss = 1.1679e-03, PNorm = 154.3436, GNorm = 0.0606, lr_0 = 1.4637e-04
Loss = 1.6402e-03, PNorm = 154.3434, GNorm = 0.1231, lr_0 = 1.4627e-04
Loss = 1.3577e-03, PNorm = 154.3439, GNorm = 0.0438, lr_0 = 1.4617e-04
Loss = 1.3326e-03, PNorm = 154.3460, GNorm = 0.1878, lr_0 = 1.4607e-04
Loss = 1.0145e-03, PNorm = 154.3489, GNorm = 0.0770, lr_0 = 1.4597e-04
Loss = 8.6449e-04, PNorm = 154.3516, GNorm = 0.0737, lr_0 = 1.4587e-04
Loss = 3.2063e-03, PNorm = 154.3539, GNorm = 0.0471, lr_0 = 1.4577e-04
Loss = 8.2188e-04, PNorm = 154.3549, GNorm = 0.1061, lr_0 = 1.4567e-04
Loss = 1.8563e-03, PNorm = 154.3575, GNorm = 0.1265, lr_0 = 1.4557e-04
Loss = 2.0067e-03, PNorm = 154.3612, GNorm = 0.0289, lr_0 = 1.4547e-04
Loss = 1.4135e-03, PNorm = 154.3634, GNorm = 0.1164, lr_0 = 1.4537e-04
Loss = 2.0633e-03, PNorm = 154.3666, GNorm = 0.3221, lr_0 = 1.4527e-04
Loss = 8.3823e-04, PNorm = 154.3703, GNorm = 0.0607, lr_0 = 1.4517e-04
Loss = 1.6234e-03, PNorm = 154.3718, GNorm = 0.1986, lr_0 = 1.4507e-04
Loss = 9.4697e-04, PNorm = 154.3748, GNorm = 0.1030, lr_0 = 1.4497e-04
Loss = 1.2759e-03, PNorm = 154.3761, GNorm = 0.1814, lr_0 = 1.4487e-04
Loss = 1.5724e-03, PNorm = 154.3776, GNorm = 0.1022, lr_0 = 1.4477e-04
Loss = 1.1276e-03, PNorm = 154.3794, GNorm = 0.1091, lr_0 = 1.4467e-04
Loss = 1.0193e-03, PNorm = 154.3823, GNorm = 0.1055, lr_0 = 1.4457e-04
Loss = 1.4729e-03, PNorm = 154.3874, GNorm = 0.3609, lr_0 = 1.4447e-04
Loss = 1.4414e-03, PNorm = 154.3896, GNorm = 0.1754, lr_0 = 1.4438e-04
Loss = 1.0139e-03, PNorm = 154.3905, GNorm = 0.0319, lr_0 = 1.4428e-04
Loss = 9.2868e-04, PNorm = 154.3923, GNorm = 0.0423, lr_0 = 1.4418e-04
Loss = 8.0223e-04, PNorm = 154.3930, GNorm = 0.1011, lr_0 = 1.4408e-04
Loss = 1.2435e-03, PNorm = 154.3936, GNorm = 0.1103, lr_0 = 1.4398e-04
Loss = 1.5776e-03, PNorm = 154.3933, GNorm = 0.0631, lr_0 = 1.4388e-04
Loss = 9.3034e-04, PNorm = 154.3958, GNorm = 0.0955, lr_0 = 1.4378e-04
Loss = 2.4434e-03, PNorm = 154.3976, GNorm = 0.1561, lr_0 = 1.4368e-04
Loss = 8.6239e-04, PNorm = 154.3988, GNorm = 0.0941, lr_0 = 1.4359e-04
Loss = 1.0986e-03, PNorm = 154.4022, GNorm = 0.0937, lr_0 = 1.4349e-04
Loss = 1.2049e-03, PNorm = 154.4037, GNorm = 0.1432, lr_0 = 1.4339e-04
Loss = 1.0027e-03, PNorm = 154.4070, GNorm = 0.0935, lr_0 = 1.4329e-04
Loss = 1.1113e-03, PNorm = 154.4091, GNorm = 0.1073, lr_0 = 1.4319e-04
Loss = 1.8298e-03, PNorm = 154.4130, GNorm = 0.1995, lr_0 = 1.4310e-04
Loss = 1.1041e-03, PNorm = 154.4167, GNorm = 0.2562, lr_0 = 1.4300e-04
Loss = 2.0715e-03, PNorm = 154.4180, GNorm = 0.1021, lr_0 = 1.4290e-04
Loss = 8.5671e-04, PNorm = 154.4193, GNorm = 0.0532, lr_0 = 1.4280e-04
Loss = 1.0462e-03, PNorm = 154.4209, GNorm = 0.0371, lr_0 = 1.4270e-04
Loss = 1.2279e-03, PNorm = 154.4219, GNorm = 0.2011, lr_0 = 1.4261e-04
Loss = 2.4131e-03, PNorm = 154.4244, GNorm = 0.2399, lr_0 = 1.4251e-04
Loss = 3.4080e-03, PNorm = 154.4270, GNorm = 0.0275, lr_0 = 1.4241e-04
Loss = 7.1298e-04, PNorm = 154.4290, GNorm = 0.0528, lr_0 = 1.4231e-04
Loss = 4.1787e-03, PNorm = 154.4308, GNorm = 0.0910, lr_0 = 1.4222e-04
Loss = 8.8194e-04, PNorm = 154.4325, GNorm = 0.0905, lr_0 = 1.4212e-04
Loss = 8.8334e-04, PNorm = 154.4358, GNorm = 0.2074, lr_0 = 1.4202e-04
Loss = 3.1640e-03, PNorm = 154.4401, GNorm = 0.0360, lr_0 = 1.4192e-04
Loss = 1.3998e-03, PNorm = 154.4429, GNorm = 0.0735, lr_0 = 1.4183e-04
Loss = 3.3401e-03, PNorm = 154.4457, GNorm = 0.0864, lr_0 = 1.4173e-04
Loss = 1.1560e-03, PNorm = 154.4490, GNorm = 0.2224, lr_0 = 1.4163e-04
Loss = 9.1307e-04, PNorm = 154.4504, GNorm = 0.1070, lr_0 = 1.4153e-04
Loss = 3.2414e-03, PNorm = 154.4535, GNorm = 0.2444, lr_0 = 1.4144e-04
Loss = 1.2758e-03, PNorm = 154.4551, GNorm = 0.0602, lr_0 = 1.4134e-04
Loss = 8.4353e-04, PNorm = 154.4567, GNorm = 0.1151, lr_0 = 1.4124e-04
Loss = 1.9463e-03, PNorm = 154.4562, GNorm = 0.1475, lr_0 = 1.4115e-04
Loss = 1.8390e-03, PNorm = 154.4571, GNorm = 0.1382, lr_0 = 1.4105e-04
Loss = 1.1297e-03, PNorm = 154.4594, GNorm = 0.1213, lr_0 = 1.4095e-04
Loss = 2.4763e-03, PNorm = 154.4636, GNorm = 0.1001, lr_0 = 1.4086e-04
Loss = 1.8655e-03, PNorm = 154.4670, GNorm = 0.1460, lr_0 = 1.4076e-04
Loss = 9.9992e-04, PNorm = 154.4672, GNorm = 0.2019, lr_0 = 1.4066e-04
Loss = 7.9215e-04, PNorm = 154.4679, GNorm = 0.0949, lr_0 = 1.4057e-04
Loss = 9.2624e-04, PNorm = 154.4708, GNorm = 0.2113, lr_0 = 1.4047e-04
Loss = 1.7577e-03, PNorm = 154.4731, GNorm = 0.0563, lr_0 = 1.4038e-04
Loss = 1.5778e-03, PNorm = 154.4737, GNorm = 0.1327, lr_0 = 1.4028e-04
Loss = 1.2231e-03, PNorm = 154.4762, GNorm = 0.1056, lr_0 = 1.4018e-04
Loss = 1.7557e-03, PNorm = 154.4761, GNorm = 0.2546, lr_0 = 1.4009e-04
Loss = 1.5170e-03, PNorm = 154.4776, GNorm = 0.0800, lr_0 = 1.3999e-04
Loss = 1.2439e-03, PNorm = 154.4788, GNorm = 0.3655, lr_0 = 1.3990e-04
Loss = 8.8213e-04, PNorm = 154.4818, GNorm = 0.1536, lr_0 = 1.3980e-04
Loss = 1.4824e-03, PNorm = 154.4841, GNorm = 0.1296, lr_0 = 1.3970e-04
Loss = 8.6756e-04, PNorm = 154.4869, GNorm = 0.1549, lr_0 = 1.3961e-04
Loss = 4.3315e-03, PNorm = 154.4899, GNorm = 0.0840, lr_0 = 1.3951e-04
Loss = 2.0244e-03, PNorm = 154.4939, GNorm = 0.0349, lr_0 = 1.3942e-04
Loss = 1.9287e-03, PNorm = 154.4973, GNorm = 0.0762, lr_0 = 1.3932e-04
Loss = 1.2993e-03, PNorm = 154.4992, GNorm = 0.1042, lr_0 = 1.3923e-04
Loss = 1.6388e-03, PNorm = 154.5005, GNorm = 0.0495, lr_0 = 1.3913e-04
Loss = 3.2833e-03, PNorm = 154.5039, GNorm = 0.0642, lr_0 = 1.3904e-04
Loss = 2.4152e-03, PNorm = 154.5062, GNorm = 0.0830, lr_0 = 1.3894e-04
Validation mae = 0.474467
Epoch 26
Loss = 3.7527e-03, PNorm = 154.5062, GNorm = 0.1234, lr_0 = 1.3884e-04
Loss = 1.3790e-03, PNorm = 154.5079, GNorm = 0.1045, lr_0 = 1.3875e-04
Loss = 7.3515e-04, PNorm = 154.5094, GNorm = 0.0622, lr_0 = 1.3865e-04
Loss = 8.0162e-04, PNorm = 154.5104, GNorm = 0.0759, lr_0 = 1.3856e-04
Loss = 1.5159e-03, PNorm = 154.5108, GNorm = 0.0496, lr_0 = 1.3846e-04
Loss = 2.1781e-03, PNorm = 154.5122, GNorm = 0.0557, lr_0 = 1.3837e-04
Loss = 8.5073e-04, PNorm = 154.5141, GNorm = 0.1681, lr_0 = 1.3828e-04
Loss = 2.4233e-03, PNorm = 154.5170, GNorm = 0.0506, lr_0 = 1.3818e-04
Loss = 2.5121e-03, PNorm = 154.5192, GNorm = 0.0583, lr_0 = 1.3809e-04
Loss = 3.2958e-03, PNorm = 154.5207, GNorm = 0.4524, lr_0 = 1.3799e-04
Loss = 1.5505e-03, PNorm = 154.5217, GNorm = 0.0938, lr_0 = 1.3790e-04
Loss = 8.3359e-04, PNorm = 154.5218, GNorm = 0.2357, lr_0 = 1.3780e-04
Loss = 1.5390e-03, PNorm = 154.5242, GNorm = 0.0959, lr_0 = 1.3771e-04
Loss = 8.0416e-04, PNorm = 154.5277, GNorm = 0.0663, lr_0 = 1.3761e-04
Loss = 9.2120e-04, PNorm = 154.5276, GNorm = 0.0373, lr_0 = 1.3752e-04
Loss = 6.8443e-04, PNorm = 154.5285, GNorm = 0.1177, lr_0 = 1.3742e-04
Loss = 6.2261e-04, PNorm = 154.5289, GNorm = 0.0776, lr_0 = 1.3733e-04
Loss = 1.7318e-03, PNorm = 154.5297, GNorm = 0.0906, lr_0 = 1.3724e-04
Loss = 8.3700e-04, PNorm = 154.5308, GNorm = 0.0413, lr_0 = 1.3714e-04
Loss = 2.3565e-03, PNorm = 154.5321, GNorm = 0.0836, lr_0 = 1.3705e-04
Loss = 8.8628e-04, PNorm = 154.5354, GNorm = 0.1080, lr_0 = 1.3695e-04
Loss = 1.1686e-03, PNorm = 154.5365, GNorm = 0.0282, lr_0 = 1.3686e-04
Loss = 1.4594e-03, PNorm = 154.5386, GNorm = 0.0509, lr_0 = 1.3677e-04
Loss = 1.2023e-03, PNorm = 154.5406, GNorm = 0.0297, lr_0 = 1.3667e-04
Loss = 9.9614e-04, PNorm = 154.5417, GNorm = 0.2339, lr_0 = 1.3658e-04
Loss = 1.2895e-03, PNorm = 154.5434, GNorm = 0.0797, lr_0 = 1.3649e-04
Loss = 1.0963e-03, PNorm = 154.5449, GNorm = 0.1162, lr_0 = 1.3639e-04
Loss = 4.1934e-03, PNorm = 154.5473, GNorm = 0.0624, lr_0 = 1.3630e-04
Loss = 9.0755e-04, PNorm = 154.5480, GNorm = 0.0495, lr_0 = 1.3621e-04
Loss = 1.6078e-03, PNorm = 154.5497, GNorm = 0.1942, lr_0 = 1.3611e-04
Loss = 1.2709e-03, PNorm = 154.5525, GNorm = 0.0868, lr_0 = 1.3602e-04
Loss = 7.1001e-04, PNorm = 154.5544, GNorm = 0.0339, lr_0 = 1.3593e-04
Loss = 1.1503e-03, PNorm = 154.5569, GNorm = 0.0357, lr_0 = 1.3583e-04
Loss = 6.7857e-04, PNorm = 154.5581, GNorm = 0.0779, lr_0 = 1.3574e-04
Loss = 1.1828e-03, PNorm = 154.5592, GNorm = 0.0717, lr_0 = 1.3565e-04
Loss = 7.0210e-04, PNorm = 154.5602, GNorm = 0.0391, lr_0 = 1.3555e-04
Loss = 8.8570e-04, PNorm = 154.5617, GNorm = 0.1150, lr_0 = 1.3546e-04
Loss = 1.0254e-03, PNorm = 154.5639, GNorm = 0.1396, lr_0 = 1.3537e-04
Loss = 7.2696e-04, PNorm = 154.5674, GNorm = 0.0783, lr_0 = 1.3528e-04
Loss = 6.6280e-04, PNorm = 154.5694, GNorm = 0.0988, lr_0 = 1.3518e-04
Loss = 6.4050e-04, PNorm = 154.5704, GNorm = 0.0749, lr_0 = 1.3509e-04
Loss = 1.8441e-03, PNorm = 154.5710, GNorm = 0.1460, lr_0 = 1.3500e-04
Loss = 8.7245e-04, PNorm = 154.5710, GNorm = 0.0971, lr_0 = 1.3491e-04
Loss = 8.8632e-04, PNorm = 154.5737, GNorm = 0.0502, lr_0 = 1.3481e-04
Loss = 1.3599e-03, PNorm = 154.5755, GNorm = 0.0500, lr_0 = 1.3472e-04
Loss = 1.4555e-03, PNorm = 154.5783, GNorm = 0.0535, lr_0 = 1.3463e-04
Loss = 4.5417e-03, PNorm = 154.5811, GNorm = 0.0460, lr_0 = 1.3454e-04
Loss = 1.7645e-03, PNorm = 154.5834, GNorm = 0.1822, lr_0 = 1.3444e-04
Loss = 8.8190e-04, PNorm = 154.5848, GNorm = 0.1143, lr_0 = 1.3435e-04
Loss = 1.0348e-03, PNorm = 154.5866, GNorm = 0.1341, lr_0 = 1.3426e-04
Loss = 2.2219e-03, PNorm = 154.5885, GNorm = 0.3887, lr_0 = 1.3417e-04
Loss = 8.2686e-04, PNorm = 154.5890, GNorm = 0.0612, lr_0 = 1.3408e-04
Loss = 7.2467e-04, PNorm = 154.5910, GNorm = 0.0874, lr_0 = 1.3398e-04
Loss = 8.6442e-04, PNorm = 154.5922, GNorm = 0.0827, lr_0 = 1.3389e-04
Loss = 1.3002e-03, PNorm = 154.5948, GNorm = 0.1946, lr_0 = 1.3380e-04
Loss = 1.0696e-03, PNorm = 154.5971, GNorm = 0.0779, lr_0 = 1.3371e-04
Loss = 7.8519e-04, PNorm = 154.5993, GNorm = 0.1392, lr_0 = 1.3362e-04
Loss = 8.4025e-04, PNorm = 154.6018, GNorm = 0.0272, lr_0 = 1.3353e-04
Loss = 1.0993e-03, PNorm = 154.6034, GNorm = 0.0220, lr_0 = 1.3343e-04
Loss = 8.4078e-04, PNorm = 154.6036, GNorm = 0.0857, lr_0 = 1.3334e-04
Loss = 1.4005e-03, PNorm = 154.6042, GNorm = 0.0598, lr_0 = 1.3325e-04
Loss = 2.1619e-03, PNorm = 154.6060, GNorm = 0.2076, lr_0 = 1.3316e-04
Loss = 2.2008e-03, PNorm = 154.6075, GNorm = 0.0861, lr_0 = 1.3307e-04
Loss = 1.2625e-03, PNorm = 154.6103, GNorm = 0.1406, lr_0 = 1.3298e-04
Loss = 1.0415e-03, PNorm = 154.6120, GNorm = 0.1297, lr_0 = 1.3289e-04
Loss = 1.9320e-03, PNorm = 154.6100, GNorm = 0.2115, lr_0 = 1.3280e-04
Loss = 8.5790e-04, PNorm = 154.6118, GNorm = 0.1171, lr_0 = 1.3270e-04
Loss = 1.2715e-03, PNorm = 154.6126, GNorm = 0.0907, lr_0 = 1.3261e-04
Loss = 1.6968e-03, PNorm = 154.6128, GNorm = 0.0354, lr_0 = 1.3252e-04
Loss = 2.0961e-03, PNorm = 154.6149, GNorm = 0.0568, lr_0 = 1.3243e-04
Loss = 1.4031e-03, PNorm = 154.6171, GNorm = 0.1050, lr_0 = 1.3234e-04
Loss = 7.5878e-04, PNorm = 154.6182, GNorm = 0.0565, lr_0 = 1.3225e-04
Loss = 1.7600e-03, PNorm = 154.6194, GNorm = 0.0662, lr_0 = 1.3216e-04
Loss = 1.7294e-03, PNorm = 154.6196, GNorm = 0.1369, lr_0 = 1.3207e-04
Loss = 9.3163e-04, PNorm = 154.6215, GNorm = 0.0368, lr_0 = 1.3198e-04
Loss = 8.0218e-04, PNorm = 154.6235, GNorm = 0.0587, lr_0 = 1.3189e-04
Loss = 2.2803e-03, PNorm = 154.6252, GNorm = 0.2118, lr_0 = 1.3180e-04
Loss = 1.1160e-03, PNorm = 154.6260, GNorm = 0.0704, lr_0 = 1.3171e-04
Loss = 2.5486e-03, PNorm = 154.6306, GNorm = 0.1021, lr_0 = 1.3162e-04
Loss = 2.6754e-03, PNorm = 154.6316, GNorm = 0.0919, lr_0 = 1.3153e-04
Loss = 2.0813e-03, PNorm = 154.6345, GNorm = 0.0712, lr_0 = 1.3144e-04
Loss = 9.5104e-04, PNorm = 154.6377, GNorm = 0.2164, lr_0 = 1.3135e-04
Loss = 1.2638e-03, PNorm = 154.6403, GNorm = 0.0888, lr_0 = 1.3126e-04
Loss = 5.6649e-04, PNorm = 154.6428, GNorm = 0.0826, lr_0 = 1.3117e-04
Loss = 1.1104e-03, PNorm = 154.6433, GNorm = 0.1922, lr_0 = 1.3108e-04
Loss = 3.1759e-03, PNorm = 154.6436, GNorm = 0.8671, lr_0 = 1.3099e-04
Loss = 3.0832e-03, PNorm = 154.6461, GNorm = 0.1705, lr_0 = 1.3090e-04
Loss = 1.3268e-03, PNorm = 154.6477, GNorm = 0.0642, lr_0 = 1.3081e-04
Loss = 1.6111e-03, PNorm = 154.6489, GNorm = 0.1047, lr_0 = 1.3072e-04
Loss = 1.0890e-03, PNorm = 154.6506, GNorm = 0.0972, lr_0 = 1.3063e-04
Loss = 8.8614e-04, PNorm = 154.6511, GNorm = 0.0761, lr_0 = 1.3054e-04
Loss = 1.3493e-03, PNorm = 154.6510, GNorm = 0.0290, lr_0 = 1.3045e-04
Loss = 9.5296e-04, PNorm = 154.6538, GNorm = 0.0939, lr_0 = 1.3036e-04
Loss = 7.7847e-04, PNorm = 154.6572, GNorm = 0.0683, lr_0 = 1.3027e-04
Loss = 9.4720e-04, PNorm = 154.6593, GNorm = 0.0278, lr_0 = 1.3018e-04
Loss = 7.2338e-04, PNorm = 154.6603, GNorm = 0.0456, lr_0 = 1.3009e-04
Loss = 7.2120e-04, PNorm = 154.6618, GNorm = 0.0480, lr_0 = 1.3000e-04
Loss = 8.3572e-04, PNorm = 154.6630, GNorm = 0.1869, lr_0 = 1.2992e-04
Loss = 1.5087e-03, PNorm = 154.6657, GNorm = 0.0687, lr_0 = 1.2983e-04
Loss = 1.0219e-03, PNorm = 154.6675, GNorm = 0.0917, lr_0 = 1.2974e-04
Loss = 3.1992e-03, PNorm = 154.6698, GNorm = 0.2634, lr_0 = 1.2965e-04
Loss = 8.5526e-04, PNorm = 154.6729, GNorm = 0.0872, lr_0 = 1.2956e-04
Loss = 1.6746e-03, PNorm = 154.6757, GNorm = 0.1225, lr_0 = 1.2947e-04
Loss = 1.1066e-03, PNorm = 154.6780, GNorm = 0.1487, lr_0 = 1.2938e-04
Loss = 1.6985e-03, PNorm = 154.6798, GNorm = 0.0925, lr_0 = 1.2929e-04
Loss = 1.1805e-03, PNorm = 154.6810, GNorm = 0.1070, lr_0 = 1.2921e-04
Loss = 9.3441e-04, PNorm = 154.6829, GNorm = 0.0653, lr_0 = 1.2912e-04
Loss = 1.4423e-03, PNorm = 154.6841, GNorm = 0.0878, lr_0 = 1.2903e-04
Loss = 1.3596e-03, PNorm = 154.6861, GNorm = 0.1095, lr_0 = 1.2894e-04
Loss = 1.0361e-03, PNorm = 154.6871, GNorm = 0.0725, lr_0 = 1.2885e-04
Loss = 2.3154e-03, PNorm = 154.6890, GNorm = 0.1934, lr_0 = 1.2876e-04
Loss = 1.6041e-03, PNorm = 154.6903, GNorm = 0.1321, lr_0 = 1.2867e-04
Loss = 1.4874e-03, PNorm = 154.6915, GNorm = 0.0763, lr_0 = 1.2859e-04
Loss = 1.2364e-03, PNorm = 154.6951, GNorm = 0.1144, lr_0 = 1.2850e-04
Loss = 9.3521e-04, PNorm = 154.6971, GNorm = 0.1263, lr_0 = 1.2841e-04
Loss = 7.6534e-04, PNorm = 154.6983, GNorm = 0.0540, lr_0 = 1.2832e-04
Loss = 2.4594e-03, PNorm = 154.7004, GNorm = 0.6831, lr_0 = 1.2823e-04
Loss = 1.1138e-03, PNorm = 154.7026, GNorm = 0.0742, lr_0 = 1.2815e-04
Loss = 7.8200e-04, PNorm = 154.7056, GNorm = 0.0691, lr_0 = 1.2806e-04
Loss = 1.1335e-03, PNorm = 154.7074, GNorm = 0.0557, lr_0 = 1.2797e-04
Validation mae = 0.474673
Epoch 27
Loss = 9.4438e-04, PNorm = 154.7084, GNorm = 0.1471, lr_0 = 1.2788e-04
Loss = 5.9257e-04, PNorm = 154.7087, GNorm = 0.1303, lr_0 = 1.2780e-04
Loss = 6.5120e-04, PNorm = 154.7093, GNorm = 0.1004, lr_0 = 1.2771e-04
Loss = 1.2429e-03, PNorm = 154.7105, GNorm = 0.0342, lr_0 = 1.2762e-04
Loss = 5.2753e-04, PNorm = 154.7115, GNorm = 0.0272, lr_0 = 1.2753e-04
Loss = 6.1089e-04, PNorm = 154.7111, GNorm = 0.0563, lr_0 = 1.2745e-04
Loss = 1.8189e-03, PNorm = 154.7110, GNorm = 0.0880, lr_0 = 1.2736e-04
Loss = 1.6413e-03, PNorm = 154.7122, GNorm = 0.0399, lr_0 = 1.2727e-04
Loss = 1.0096e-03, PNorm = 154.7159, GNorm = 0.0420, lr_0 = 1.2718e-04
Loss = 6.9005e-04, PNorm = 154.7181, GNorm = 0.0603, lr_0 = 1.2710e-04
Loss = 1.5227e-03, PNorm = 154.7181, GNorm = 0.1094, lr_0 = 1.2701e-04
Loss = 6.0664e-04, PNorm = 154.7204, GNorm = 0.0964, lr_0 = 1.2692e-04
Loss = 1.3247e-03, PNorm = 154.7219, GNorm = 0.1554, lr_0 = 1.2684e-04
Loss = 8.5031e-04, PNorm = 154.7238, GNorm = 0.1555, lr_0 = 1.2675e-04
Loss = 1.7819e-03, PNorm = 154.7251, GNorm = 0.1296, lr_0 = 1.2666e-04
Loss = 1.6619e-03, PNorm = 154.7266, GNorm = 0.0495, lr_0 = 1.2658e-04
Loss = 2.2196e-03, PNorm = 154.7262, GNorm = 0.0838, lr_0 = 1.2649e-04
Loss = 1.0666e-03, PNorm = 154.7284, GNorm = 0.0394, lr_0 = 1.2640e-04
Loss = 7.7006e-04, PNorm = 154.7308, GNorm = 0.0877, lr_0 = 1.2632e-04
Loss = 7.1407e-04, PNorm = 154.7330, GNorm = 0.0448, lr_0 = 1.2623e-04
Loss = 1.2493e-03, PNorm = 154.7331, GNorm = 0.0538, lr_0 = 1.2614e-04
Loss = 8.0775e-04, PNorm = 154.7340, GNorm = 0.0398, lr_0 = 1.2606e-04
Loss = 7.4551e-04, PNorm = 154.7365, GNorm = 0.1722, lr_0 = 1.2597e-04
Loss = 1.0101e-03, PNorm = 154.7385, GNorm = 0.0475, lr_0 = 1.2588e-04
Loss = 6.3530e-04, PNorm = 154.7392, GNorm = 0.0508, lr_0 = 1.2580e-04
Loss = 4.9241e-04, PNorm = 154.7404, GNorm = 0.1417, lr_0 = 1.2571e-04
Loss = 1.9481e-03, PNorm = 154.7399, GNorm = 0.1256, lr_0 = 1.2563e-04
Loss = 9.5774e-04, PNorm = 154.7407, GNorm = 0.0446, lr_0 = 1.2554e-04
Loss = 8.0672e-04, PNorm = 154.7428, GNorm = 0.0462, lr_0 = 1.2545e-04
Loss = 8.7978e-04, PNorm = 154.7453, GNorm = 0.1750, lr_0 = 1.2537e-04
Loss = 1.2371e-03, PNorm = 154.7464, GNorm = 0.0703, lr_0 = 1.2528e-04
Loss = 1.3906e-03, PNorm = 154.7485, GNorm = 0.0353, lr_0 = 1.2520e-04
Loss = 1.1458e-03, PNorm = 154.7494, GNorm = 0.1545, lr_0 = 1.2511e-04
Loss = 6.4092e-04, PNorm = 154.7512, GNorm = 0.0562, lr_0 = 1.2502e-04
Loss = 7.5279e-04, PNorm = 154.7524, GNorm = 0.0666, lr_0 = 1.2494e-04
Loss = 1.3366e-03, PNorm = 154.7531, GNorm = 0.1043, lr_0 = 1.2485e-04
Loss = 9.6878e-04, PNorm = 154.7534, GNorm = 0.0399, lr_0 = 1.2477e-04
Loss = 2.0022e-03, PNorm = 154.7545, GNorm = 0.1004, lr_0 = 1.2468e-04
Loss = 2.5102e-03, PNorm = 154.7565, GNorm = 0.1732, lr_0 = 1.2460e-04
Loss = 2.6852e-03, PNorm = 154.7574, GNorm = 0.1067, lr_0 = 1.2451e-04
Loss = 5.3048e-04, PNorm = 154.7567, GNorm = 0.0158, lr_0 = 1.2443e-04
Loss = 8.3978e-04, PNorm = 154.7566, GNorm = 0.0452, lr_0 = 1.2434e-04
Loss = 6.1173e-04, PNorm = 154.7568, GNorm = 0.0633, lr_0 = 1.2426e-04
Loss = 8.3738e-04, PNorm = 154.7579, GNorm = 0.1013, lr_0 = 1.2417e-04
Loss = 7.1932e-04, PNorm = 154.7608, GNorm = 0.0782, lr_0 = 1.2409e-04
Loss = 1.3316e-03, PNorm = 154.7630, GNorm = 0.1542, lr_0 = 1.2400e-04
Loss = 9.3676e-04, PNorm = 154.7632, GNorm = 0.1095, lr_0 = 1.2392e-04
Loss = 1.7979e-03, PNorm = 154.7661, GNorm = 0.0401, lr_0 = 1.2383e-04
Loss = 2.3542e-03, PNorm = 154.7686, GNorm = 0.0492, lr_0 = 1.2375e-04
Loss = 5.6230e-04, PNorm = 154.7712, GNorm = 0.0725, lr_0 = 1.2366e-04
Loss = 1.7190e-03, PNorm = 154.7735, GNorm = 0.1127, lr_0 = 1.2358e-04
Loss = 9.7767e-04, PNorm = 154.7763, GNorm = 0.1797, lr_0 = 1.2349e-04
Loss = 1.6409e-03, PNorm = 154.7783, GNorm = 0.0789, lr_0 = 1.2341e-04
Loss = 9.0782e-04, PNorm = 154.7793, GNorm = 0.1735, lr_0 = 1.2332e-04
Loss = 1.2931e-03, PNorm = 154.7808, GNorm = 0.0665, lr_0 = 1.2324e-04
Loss = 1.8358e-03, PNorm = 154.7806, GNorm = 0.2152, lr_0 = 1.2315e-04
Loss = 7.9102e-04, PNorm = 154.7807, GNorm = 0.0549, lr_0 = 1.2307e-04
Loss = 8.6635e-04, PNorm = 154.7824, GNorm = 0.0665, lr_0 = 1.2298e-04
Loss = 1.0117e-03, PNorm = 154.7833, GNorm = 0.0782, lr_0 = 1.2290e-04
Loss = 8.0982e-04, PNorm = 154.7851, GNorm = 0.0403, lr_0 = 1.2282e-04
Loss = 1.6044e-03, PNorm = 154.7860, GNorm = 0.0964, lr_0 = 1.2273e-04
Loss = 8.9759e-04, PNorm = 154.7873, GNorm = 0.0438, lr_0 = 1.2265e-04
Loss = 8.6008e-04, PNorm = 154.7884, GNorm = 0.1704, lr_0 = 1.2256e-04
Loss = 1.7277e-03, PNorm = 154.7891, GNorm = 0.0901, lr_0 = 1.2248e-04
Loss = 1.2599e-03, PNorm = 154.7911, GNorm = 0.1229, lr_0 = 1.2240e-04
Loss = 2.2821e-03, PNorm = 154.7916, GNorm = 0.0830, lr_0 = 1.2231e-04
Loss = 2.1087e-03, PNorm = 154.7920, GNorm = 0.1031, lr_0 = 1.2223e-04
Loss = 9.9346e-04, PNorm = 154.7935, GNorm = 0.1692, lr_0 = 1.2214e-04
Loss = 1.6310e-03, PNorm = 154.7956, GNorm = 0.0379, lr_0 = 1.2206e-04
Loss = 3.1567e-03, PNorm = 154.7965, GNorm = 0.0863, lr_0 = 1.2198e-04
Loss = 8.9661e-04, PNorm = 154.7982, GNorm = 0.0546, lr_0 = 1.2189e-04
Loss = 6.5015e-04, PNorm = 154.7997, GNorm = 0.0433, lr_0 = 1.2181e-04
Loss = 1.0686e-03, PNorm = 154.8005, GNorm = 0.0294, lr_0 = 1.2173e-04
Loss = 7.2003e-04, PNorm = 154.8010, GNorm = 0.1408, lr_0 = 1.2164e-04
Loss = 1.5152e-03, PNorm = 154.8018, GNorm = 0.1255, lr_0 = 1.2156e-04
Loss = 1.1735e-03, PNorm = 154.8015, GNorm = 0.1097, lr_0 = 1.2148e-04
Loss = 6.4992e-04, PNorm = 154.8022, GNorm = 0.1293, lr_0 = 1.2139e-04
Loss = 1.3440e-03, PNorm = 154.8040, GNorm = 0.0550, lr_0 = 1.2131e-04
Loss = 3.9684e-03, PNorm = 154.8065, GNorm = 0.1005, lr_0 = 1.2123e-04
Loss = 7.1759e-04, PNorm = 154.8071, GNorm = 0.0265, lr_0 = 1.2114e-04
Loss = 1.0557e-03, PNorm = 154.8089, GNorm = 0.0719, lr_0 = 1.2106e-04
Loss = 1.1882e-03, PNorm = 154.8098, GNorm = 0.0528, lr_0 = 1.2098e-04
Loss = 1.1359e-03, PNorm = 154.8109, GNorm = 0.0935, lr_0 = 1.2090e-04
Loss = 2.9336e-03, PNorm = 154.8122, GNorm = 0.0564, lr_0 = 1.2081e-04
Loss = 1.3554e-03, PNorm = 154.8142, GNorm = 0.0738, lr_0 = 1.2073e-04
Loss = 2.5707e-03, PNorm = 154.8157, GNorm = 0.1897, lr_0 = 1.2065e-04
Loss = 1.1856e-03, PNorm = 154.8170, GNorm = 0.0490, lr_0 = 1.2056e-04
Loss = 1.0795e-03, PNorm = 154.8181, GNorm = 0.1721, lr_0 = 1.2048e-04
Loss = 3.0356e-03, PNorm = 154.8194, GNorm = 0.0249, lr_0 = 1.2040e-04
Loss = 1.8636e-03, PNorm = 154.8221, GNorm = 0.1124, lr_0 = 1.2032e-04
Loss = 2.2090e-03, PNorm = 154.8243, GNorm = 0.1107, lr_0 = 1.2023e-04
Loss = 6.2832e-04, PNorm = 154.8255, GNorm = 0.0861, lr_0 = 1.2015e-04
Loss = 6.0755e-04, PNorm = 154.8278, GNorm = 0.0868, lr_0 = 1.2007e-04
Loss = 4.0470e-03, PNorm = 154.8297, GNorm = 0.0514, lr_0 = 1.1999e-04
Loss = 1.2445e-03, PNorm = 154.8316, GNorm = 0.0980, lr_0 = 1.1991e-04
Loss = 5.8488e-04, PNorm = 154.8336, GNorm = 0.1444, lr_0 = 1.1982e-04
Loss = 1.1967e-03, PNorm = 154.8359, GNorm = 0.0232, lr_0 = 1.1974e-04
Loss = 1.1444e-03, PNorm = 154.8369, GNorm = 0.0783, lr_0 = 1.1966e-04
Loss = 1.5707e-03, PNorm = 154.8378, GNorm = 0.0938, lr_0 = 1.1958e-04
Loss = 6.3406e-04, PNorm = 154.8385, GNorm = 0.0300, lr_0 = 1.1950e-04
Loss = 1.4832e-03, PNorm = 154.8402, GNorm = 0.0444, lr_0 = 1.1941e-04
Loss = 7.6949e-04, PNorm = 154.8419, GNorm = 0.0384, lr_0 = 1.1933e-04
Loss = 7.4775e-04, PNorm = 154.8417, GNorm = 0.1323, lr_0 = 1.1925e-04
Loss = 1.6489e-03, PNorm = 154.8440, GNorm = 0.0758, lr_0 = 1.1917e-04
Loss = 1.1785e-03, PNorm = 154.8445, GNorm = 0.0565, lr_0 = 1.1909e-04
Loss = 9.2866e-04, PNorm = 154.8454, GNorm = 0.0826, lr_0 = 1.1901e-04
Loss = 1.2133e-03, PNorm = 154.8467, GNorm = 0.1924, lr_0 = 1.1892e-04
Loss = 1.0853e-03, PNorm = 154.8485, GNorm = 0.2570, lr_0 = 1.1884e-04
Loss = 6.5756e-04, PNorm = 154.8524, GNorm = 0.0788, lr_0 = 1.1876e-04
Loss = 1.0036e-03, PNorm = 154.8548, GNorm = 0.0422, lr_0 = 1.1868e-04
Loss = 1.1287e-03, PNorm = 154.8577, GNorm = 0.0441, lr_0 = 1.1860e-04
Loss = 5.0415e-04, PNorm = 154.8594, GNorm = 0.0353, lr_0 = 1.1852e-04
Loss = 6.5975e-04, PNorm = 154.8603, GNorm = 0.0846, lr_0 = 1.1844e-04
Loss = 2.5722e-03, PNorm = 154.8610, GNorm = 0.1088, lr_0 = 1.1835e-04
Loss = 1.1623e-03, PNorm = 154.8633, GNorm = 0.0759, lr_0 = 1.1827e-04
Loss = 1.0661e-03, PNorm = 154.8673, GNorm = 0.0585, lr_0 = 1.1819e-04
Loss = 8.2758e-04, PNorm = 154.8698, GNorm = 0.0569, lr_0 = 1.1811e-04
Loss = 8.9514e-04, PNorm = 154.8719, GNorm = 0.0506, lr_0 = 1.1803e-04
Loss = 2.6882e-03, PNorm = 154.8730, GNorm = 0.0568, lr_0 = 1.1795e-04
Loss = 1.1648e-03, PNorm = 154.8737, GNorm = 0.0531, lr_0 = 1.1787e-04
Validation mae = 0.474467
Epoch 28
Loss = 1.4633e-03, PNorm = 154.8761, GNorm = 0.1715, lr_0 = 1.1779e-04
Loss = 1.6472e-03, PNorm = 154.8770, GNorm = 0.1514, lr_0 = 1.1771e-04
Loss = 1.9447e-03, PNorm = 154.8796, GNorm = 0.0450, lr_0 = 1.1763e-04
Loss = 1.0782e-03, PNorm = 154.8812, GNorm = 0.1220, lr_0 = 1.1755e-04
Loss = 1.7901e-03, PNorm = 154.8824, GNorm = 0.0726, lr_0 = 1.1747e-04
Loss = 5.9934e-04, PNorm = 154.8849, GNorm = 0.0384, lr_0 = 1.1739e-04
Loss = 6.1778e-04, PNorm = 154.8859, GNorm = 0.1211, lr_0 = 1.1730e-04
Loss = 6.1000e-04, PNorm = 154.8863, GNorm = 0.0702, lr_0 = 1.1722e-04
Loss = 2.3506e-03, PNorm = 154.8860, GNorm = 0.0979, lr_0 = 1.1714e-04
Loss = 1.2325e-03, PNorm = 154.8854, GNorm = 0.0568, lr_0 = 1.1706e-04
Loss = 1.2172e-03, PNorm = 154.8857, GNorm = 0.2133, lr_0 = 1.1698e-04
Loss = 1.1448e-03, PNorm = 154.8858, GNorm = 0.0277, lr_0 = 1.1690e-04
Loss = 1.4229e-03, PNorm = 154.8868, GNorm = 0.0784, lr_0 = 1.1682e-04
Loss = 5.3439e-04, PNorm = 154.8896, GNorm = 0.0731, lr_0 = 1.1674e-04
Loss = 1.4215e-03, PNorm = 154.8912, GNorm = 0.0680, lr_0 = 1.1666e-04
Loss = 7.2877e-04, PNorm = 154.8912, GNorm = 0.1164, lr_0 = 1.1658e-04
Loss = 1.1386e-03, PNorm = 154.8915, GNorm = 0.0640, lr_0 = 1.1650e-04
Loss = 1.6316e-03, PNorm = 154.8927, GNorm = 0.1396, lr_0 = 1.1642e-04
Loss = 4.4247e-04, PNorm = 154.8934, GNorm = 0.0624, lr_0 = 1.1634e-04
Loss = 6.0396e-04, PNorm = 154.8936, GNorm = 0.1991, lr_0 = 1.1626e-04
Loss = 2.2968e-03, PNorm = 154.8953, GNorm = 0.7437, lr_0 = 1.1618e-04
Loss = 6.2330e-04, PNorm = 154.8947, GNorm = 0.1775, lr_0 = 1.1611e-04
Loss = 1.1746e-03, PNorm = 154.8969, GNorm = 0.2026, lr_0 = 1.1603e-04
Loss = 5.3226e-04, PNorm = 154.8973, GNorm = 0.0409, lr_0 = 1.1595e-04
Loss = 7.0622e-04, PNorm = 154.8986, GNorm = 0.0503, lr_0 = 1.1587e-04
Loss = 5.2040e-04, PNorm = 154.9009, GNorm = 0.0472, lr_0 = 1.1579e-04
Loss = 8.2114e-04, PNorm = 154.9031, GNorm = 0.0680, lr_0 = 1.1571e-04
Loss = 1.1472e-03, PNorm = 154.9046, GNorm = 0.0708, lr_0 = 1.1563e-04
Loss = 4.9990e-04, PNorm = 154.9059, GNorm = 0.0347, lr_0 = 1.1555e-04
Loss = 8.1423e-04, PNorm = 154.9084, GNorm = 0.1870, lr_0 = 1.1547e-04
Loss = 1.7598e-03, PNorm = 154.9101, GNorm = 0.1672, lr_0 = 1.1539e-04
Loss = 1.5593e-03, PNorm = 154.9113, GNorm = 0.0301, lr_0 = 1.1531e-04
Loss = 2.0429e-03, PNorm = 154.9121, GNorm = 0.0339, lr_0 = 1.1523e-04
Loss = 7.9100e-04, PNorm = 154.9127, GNorm = 0.0713, lr_0 = 1.1515e-04
Loss = 5.8538e-04, PNorm = 154.9149, GNorm = 0.0213, lr_0 = 1.1508e-04
Loss = 1.0027e-03, PNorm = 154.9157, GNorm = 0.0442, lr_0 = 1.1500e-04
Loss = 7.2881e-04, PNorm = 154.9170, GNorm = 0.1061, lr_0 = 1.1492e-04
Loss = 6.1879e-04, PNorm = 154.9179, GNorm = 0.0457, lr_0 = 1.1484e-04
Loss = 8.5100e-04, PNorm = 154.9186, GNorm = 0.0873, lr_0 = 1.1476e-04
Loss = 6.9012e-04, PNorm = 154.9191, GNorm = 0.0700, lr_0 = 1.1468e-04
Loss = 5.6116e-04, PNorm = 154.9208, GNorm = 0.0660, lr_0 = 1.1460e-04
Loss = 2.0383e-03, PNorm = 154.9217, GNorm = 0.2269, lr_0 = 1.1452e-04
Loss = 4.9809e-04, PNorm = 154.9231, GNorm = 0.1092, lr_0 = 1.1445e-04
Loss = 4.2549e-03, PNorm = 154.9251, GNorm = 0.0600, lr_0 = 1.1437e-04
Loss = 6.6151e-04, PNorm = 154.9272, GNorm = 0.1003, lr_0 = 1.1429e-04
Loss = 1.5971e-03, PNorm = 154.9277, GNorm = 0.0793, lr_0 = 1.1421e-04
Loss = 2.8560e-03, PNorm = 154.9292, GNorm = 0.1073, lr_0 = 1.1413e-04
Loss = 1.8338e-03, PNorm = 154.9310, GNorm = 0.1219, lr_0 = 1.1405e-04
Loss = 9.2725e-04, PNorm = 154.9309, GNorm = 0.4059, lr_0 = 1.1398e-04
Loss = 1.1864e-03, PNorm = 154.9316, GNorm = 0.0443, lr_0 = 1.1390e-04
Loss = 1.4559e-03, PNorm = 154.9322, GNorm = 0.0357, lr_0 = 1.1382e-04
Loss = 3.2280e-03, PNorm = 154.9347, GNorm = 0.1097, lr_0 = 1.1374e-04
Loss = 7.5853e-04, PNorm = 154.9364, GNorm = 0.0380, lr_0 = 1.1366e-04
Loss = 6.4366e-04, PNorm = 154.9380, GNorm = 0.0595, lr_0 = 1.1359e-04
Loss = 3.3776e-03, PNorm = 154.9393, GNorm = 0.1320, lr_0 = 1.1351e-04
Loss = 7.8635e-04, PNorm = 154.9413, GNorm = 0.0402, lr_0 = 1.1343e-04
Loss = 5.1453e-04, PNorm = 154.9428, GNorm = 0.0187, lr_0 = 1.1335e-04
Loss = 6.7316e-04, PNorm = 154.9432, GNorm = 0.0427, lr_0 = 1.1328e-04
Loss = 4.9857e-04, PNorm = 154.9448, GNorm = 0.0668, lr_0 = 1.1320e-04
Loss = 4.4163e-04, PNorm = 154.9462, GNorm = 0.0847, lr_0 = 1.1312e-04
Loss = 1.4679e-03, PNorm = 154.9476, GNorm = 0.0368, lr_0 = 1.1304e-04
Loss = 6.3608e-04, PNorm = 154.9496, GNorm = 0.0751, lr_0 = 1.1297e-04
Loss = 1.2060e-03, PNorm = 154.9511, GNorm = 0.0660, lr_0 = 1.1289e-04
Loss = 6.4321e-04, PNorm = 154.9520, GNorm = 0.0837, lr_0 = 1.1281e-04
Loss = 5.2822e-04, PNorm = 154.9530, GNorm = 0.0308, lr_0 = 1.1273e-04
Loss = 5.4237e-04, PNorm = 154.9531, GNorm = 0.0735, lr_0 = 1.1266e-04
Loss = 5.6959e-04, PNorm = 154.9549, GNorm = 0.0262, lr_0 = 1.1258e-04
Loss = 4.5040e-04, PNorm = 154.9553, GNorm = 0.1044, lr_0 = 1.1250e-04
Loss = 1.0892e-03, PNorm = 154.9568, GNorm = 0.0538, lr_0 = 1.1243e-04
Loss = 6.8275e-04, PNorm = 154.9576, GNorm = 0.1335, lr_0 = 1.1235e-04
Loss = 2.0025e-03, PNorm = 154.9584, GNorm = 0.1015, lr_0 = 1.1227e-04
Loss = 1.4249e-03, PNorm = 154.9590, GNorm = 0.2821, lr_0 = 1.1219e-04
Loss = 4.7467e-04, PNorm = 154.9615, GNorm = 0.0741, lr_0 = 1.1212e-04
Loss = 6.7103e-04, PNorm = 154.9630, GNorm = 0.0321, lr_0 = 1.1204e-04
Loss = 1.0129e-03, PNorm = 154.9648, GNorm = 0.0555, lr_0 = 1.1196e-04
Loss = 5.5500e-04, PNorm = 154.9656, GNorm = 0.0772, lr_0 = 1.1189e-04
Loss = 1.0291e-03, PNorm = 154.9679, GNorm = 0.2264, lr_0 = 1.1181e-04
Loss = 5.1867e-04, PNorm = 154.9688, GNorm = 0.1074, lr_0 = 1.1173e-04
Loss = 7.4418e-04, PNorm = 154.9704, GNorm = 0.0919, lr_0 = 1.1166e-04
Loss = 2.2436e-03, PNorm = 154.9739, GNorm = 0.0399, lr_0 = 1.1158e-04
Loss = 1.2352e-03, PNorm = 154.9764, GNorm = 0.0204, lr_0 = 1.1150e-04
Loss = 1.6610e-03, PNorm = 154.9770, GNorm = 0.0686, lr_0 = 1.1143e-04
Loss = 2.4156e-03, PNorm = 154.9771, GNorm = 0.0395, lr_0 = 1.1135e-04
Loss = 9.2833e-04, PNorm = 154.9784, GNorm = 0.1321, lr_0 = 1.1128e-04
Loss = 7.2892e-04, PNorm = 154.9784, GNorm = 0.0404, lr_0 = 1.1120e-04
Loss = 2.4347e-03, PNorm = 154.9799, GNorm = 0.0756, lr_0 = 1.1112e-04
Loss = 1.8975e-03, PNorm = 154.9812, GNorm = 0.1827, lr_0 = 1.1105e-04
Loss = 1.7468e-03, PNorm = 154.9814, GNorm = 0.1062, lr_0 = 1.1097e-04
Loss = 8.1769e-04, PNorm = 154.9835, GNorm = 0.0385, lr_0 = 1.1089e-04
Loss = 4.2548e-04, PNorm = 154.9854, GNorm = 0.0345, lr_0 = 1.1082e-04
Loss = 1.1159e-03, PNorm = 154.9874, GNorm = 0.0381, lr_0 = 1.1074e-04
Loss = 1.2258e-03, PNorm = 154.9867, GNorm = 0.1663, lr_0 = 1.1067e-04
Loss = 1.1193e-03, PNorm = 154.9882, GNorm = 0.0825, lr_0 = 1.1059e-04
Loss = 1.0838e-03, PNorm = 154.9891, GNorm = 0.0922, lr_0 = 1.1052e-04
Loss = 1.1783e-03, PNorm = 154.9904, GNorm = 0.1267, lr_0 = 1.1044e-04
Loss = 4.7357e-04, PNorm = 154.9925, GNorm = 0.1041, lr_0 = 1.1036e-04
Loss = 1.8954e-03, PNorm = 154.9922, GNorm = 0.0282, lr_0 = 1.1029e-04
Loss = 1.1107e-03, PNorm = 154.9929, GNorm = 0.1261, lr_0 = 1.1021e-04
Loss = 8.3179e-04, PNorm = 154.9942, GNorm = 0.0814, lr_0 = 1.1014e-04
Loss = 2.5249e-03, PNorm = 154.9947, GNorm = 0.0473, lr_0 = 1.1006e-04
Loss = 8.1777e-04, PNorm = 154.9957, GNorm = 0.0608, lr_0 = 1.0999e-04
Loss = 6.5020e-04, PNorm = 154.9970, GNorm = 0.0611, lr_0 = 1.0991e-04
Loss = 1.3147e-03, PNorm = 154.9983, GNorm = 0.0552, lr_0 = 1.0984e-04
Loss = 1.5060e-03, PNorm = 154.9993, GNorm = 0.0677, lr_0 = 1.0976e-04
Loss = 4.9113e-04, PNorm = 155.0000, GNorm = 0.1084, lr_0 = 1.0969e-04
Loss = 1.7544e-03, PNorm = 155.0016, GNorm = 0.0310, lr_0 = 1.0961e-04
Loss = 1.3836e-03, PNorm = 155.0032, GNorm = 0.0289, lr_0 = 1.0954e-04
Loss = 6.5913e-04, PNorm = 155.0059, GNorm = 0.0764, lr_0 = 1.0946e-04
Loss = 1.7811e-03, PNorm = 155.0071, GNorm = 0.0358, lr_0 = 1.0939e-04
Loss = 1.9465e-03, PNorm = 155.0088, GNorm = 0.1431, lr_0 = 1.0931e-04
Loss = 7.9359e-04, PNorm = 155.0107, GNorm = 0.0523, lr_0 = 1.0924e-04
Loss = 7.5716e-04, PNorm = 155.0118, GNorm = 0.0981, lr_0 = 1.0916e-04
Loss = 8.8037e-04, PNorm = 155.0138, GNorm = 0.0687, lr_0 = 1.0909e-04
Loss = 1.9900e-03, PNorm = 155.0149, GNorm = 0.1608, lr_0 = 1.0901e-04
Loss = 1.2186e-03, PNorm = 155.0179, GNorm = 0.0770, lr_0 = 1.0894e-04
Loss = 7.6161e-04, PNorm = 155.0195, GNorm = 0.0684, lr_0 = 1.0886e-04
Loss = 9.9339e-04, PNorm = 155.0211, GNorm = 0.1711, lr_0 = 1.0879e-04
Loss = 4.1669e-04, PNorm = 155.0226, GNorm = 0.0395, lr_0 = 1.0871e-04
Loss = 8.8389e-04, PNorm = 155.0225, GNorm = 0.0918, lr_0 = 1.0864e-04
Loss = 1.4163e-03, PNorm = 155.0250, GNorm = 0.0443, lr_0 = 1.0856e-04
Validation mae = 0.474033
Epoch 29
Loss = 1.0216e-03, PNorm = 155.0265, GNorm = 0.1468, lr_0 = 1.0849e-04
Loss = 5.3243e-04, PNorm = 155.0273, GNorm = 0.0737, lr_0 = 1.0841e-04
Loss = 2.2021e-03, PNorm = 155.0278, GNorm = 0.1254, lr_0 = 1.0834e-04
Loss = 4.4782e-04, PNorm = 155.0289, GNorm = 0.0659, lr_0 = 1.0827e-04
Loss = 4.6087e-04, PNorm = 155.0302, GNorm = 0.0265, lr_0 = 1.0819e-04
Loss = 1.6027e-03, PNorm = 155.0301, GNorm = 0.2685, lr_0 = 1.0812e-04
Loss = 6.2325e-04, PNorm = 155.0300, GNorm = 0.0256, lr_0 = 1.0804e-04
Loss = 2.0702e-03, PNorm = 155.0310, GNorm = 0.0656, lr_0 = 1.0797e-04
Loss = 6.9141e-04, PNorm = 155.0328, GNorm = 0.0650, lr_0 = 1.0790e-04
Loss = 4.3207e-04, PNorm = 155.0323, GNorm = 0.0574, lr_0 = 1.0782e-04
Loss = 8.2727e-04, PNorm = 155.0333, GNorm = 0.1068, lr_0 = 1.0775e-04
Loss = 6.2085e-04, PNorm = 155.0339, GNorm = 0.0590, lr_0 = 1.0767e-04
Loss = 1.2070e-03, PNorm = 155.0355, GNorm = 0.1406, lr_0 = 1.0760e-04
Loss = 7.0679e-04, PNorm = 155.0354, GNorm = 0.0683, lr_0 = 1.0753e-04
Loss = 5.4126e-04, PNorm = 155.0360, GNorm = 0.1115, lr_0 = 1.0745e-04
Loss = 5.7203e-04, PNorm = 155.0366, GNorm = 0.0907, lr_0 = 1.0738e-04
Loss = 7.0016e-04, PNorm = 155.0385, GNorm = 0.0712, lr_0 = 1.0731e-04
Loss = 1.3604e-03, PNorm = 155.0388, GNorm = 0.0321, lr_0 = 1.0723e-04
Loss = 9.8329e-04, PNorm = 155.0387, GNorm = 0.0435, lr_0 = 1.0716e-04
Loss = 9.1096e-04, PNorm = 155.0391, GNorm = 0.1054, lr_0 = 1.0709e-04
Loss = 1.5437e-03, PNorm = 155.0404, GNorm = 0.0967, lr_0 = 1.0701e-04
Loss = 1.0803e-03, PNorm = 155.0420, GNorm = 0.0707, lr_0 = 1.0694e-04
Loss = 2.0336e-03, PNorm = 155.0439, GNorm = 0.0419, lr_0 = 1.0687e-04
Loss = 1.7442e-03, PNorm = 155.0468, GNorm = 0.0357, lr_0 = 1.0679e-04
Loss = 1.9470e-03, PNorm = 155.0484, GNorm = 0.0699, lr_0 = 1.0672e-04
Loss = 1.6082e-03, PNorm = 155.0477, GNorm = 0.1426, lr_0 = 1.0665e-04
Loss = 2.0954e-03, PNorm = 155.0468, GNorm = 0.0668, lr_0 = 1.0657e-04
Loss = 1.4883e-03, PNorm = 155.0472, GNorm = 0.0464, lr_0 = 1.0650e-04
Loss = 1.1802e-03, PNorm = 155.0496, GNorm = 0.1421, lr_0 = 1.0643e-04
Loss = 1.3264e-03, PNorm = 155.0525, GNorm = 0.1267, lr_0 = 1.0635e-04
Loss = 7.1933e-04, PNorm = 155.0554, GNorm = 0.1020, lr_0 = 1.0628e-04
Loss = 6.5246e-04, PNorm = 155.0569, GNorm = 0.0196, lr_0 = 1.0621e-04
Loss = 4.5900e-04, PNorm = 155.0574, GNorm = 0.0709, lr_0 = 1.0614e-04
Loss = 1.0624e-03, PNorm = 155.0559, GNorm = 0.1184, lr_0 = 1.0606e-04
Loss = 6.4176e-04, PNorm = 155.0564, GNorm = 0.0291, lr_0 = 1.0599e-04
Loss = 5.2325e-04, PNorm = 155.0578, GNorm = 0.0375, lr_0 = 1.0592e-04
Loss = 1.4374e-03, PNorm = 155.0581, GNorm = 0.0812, lr_0 = 1.0585e-04
Loss = 9.8056e-04, PNorm = 155.0587, GNorm = 0.0501, lr_0 = 1.0577e-04
Loss = 4.7506e-04, PNorm = 155.0584, GNorm = 0.0534, lr_0 = 1.0570e-04
Loss = 9.8975e-04, PNorm = 155.0606, GNorm = 0.0662, lr_0 = 1.0563e-04
Loss = 4.2327e-04, PNorm = 155.0636, GNorm = 0.0608, lr_0 = 1.0556e-04
Loss = 1.2523e-03, PNorm = 155.0664, GNorm = 0.0418, lr_0 = 1.0548e-04
Loss = 6.0130e-04, PNorm = 155.0682, GNorm = 0.0261, lr_0 = 1.0541e-04
Loss = 3.9659e-04, PNorm = 155.0701, GNorm = 0.0867, lr_0 = 1.0534e-04
Loss = 8.2090e-04, PNorm = 155.0713, GNorm = 0.1038, lr_0 = 1.0527e-04
Loss = 3.0944e-03, PNorm = 155.0724, GNorm = 0.0632, lr_0 = 1.0519e-04
Loss = 7.2264e-04, PNorm = 155.0746, GNorm = 0.0985, lr_0 = 1.0512e-04
Loss = 1.2290e-03, PNorm = 155.0753, GNorm = 0.0609, lr_0 = 1.0505e-04
Loss = 4.1743e-04, PNorm = 155.0759, GNorm = 0.0922, lr_0 = 1.0498e-04
Loss = 5.3953e-04, PNorm = 155.0752, GNorm = 0.0628, lr_0 = 1.0491e-04
Loss = 3.7462e-04, PNorm = 155.0750, GNorm = 0.0715, lr_0 = 1.0483e-04
Loss = 4.0957e-04, PNorm = 155.0766, GNorm = 0.0332, lr_0 = 1.0476e-04
Loss = 1.7227e-03, PNorm = 155.0784, GNorm = 0.5506, lr_0 = 1.0469e-04
Loss = 4.1325e-04, PNorm = 155.0785, GNorm = 0.0610, lr_0 = 1.0462e-04
Loss = 9.7782e-04, PNorm = 155.0789, GNorm = 0.1625, lr_0 = 1.0455e-04
Loss = 4.1729e-04, PNorm = 155.0804, GNorm = 0.0988, lr_0 = 1.0448e-04
Loss = 4.8281e-04, PNorm = 155.0804, GNorm = 0.0532, lr_0 = 1.0440e-04
Loss = 9.6372e-04, PNorm = 155.0799, GNorm = 0.0952, lr_0 = 1.0433e-04
Loss = 5.7660e-04, PNorm = 155.0801, GNorm = 0.0427, lr_0 = 1.0426e-04
Loss = 1.0540e-03, PNorm = 155.0814, GNorm = 0.1237, lr_0 = 1.0419e-04
Loss = 1.4182e-03, PNorm = 155.0821, GNorm = 0.1435, lr_0 = 1.0412e-04
Loss = 8.1820e-04, PNorm = 155.0829, GNorm = 0.0464, lr_0 = 1.0405e-04
Loss = 1.0509e-03, PNorm = 155.0841, GNorm = 0.1694, lr_0 = 1.0398e-04
Loss = 5.1909e-04, PNorm = 155.0839, GNorm = 0.0804, lr_0 = 1.0391e-04
Loss = 6.9156e-04, PNorm = 155.0839, GNorm = 0.0837, lr_0 = 1.0383e-04
Loss = 2.2762e-03, PNorm = 155.0853, GNorm = 0.0910, lr_0 = 1.0376e-04
Loss = 7.8778e-04, PNorm = 155.0867, GNorm = 0.1274, lr_0 = 1.0369e-04
Loss = 5.3020e-04, PNorm = 155.0886, GNorm = 0.0474, lr_0 = 1.0362e-04
Loss = 8.2306e-04, PNorm = 155.0904, GNorm = 0.0323, lr_0 = 1.0355e-04
Loss = 1.4024e-03, PNorm = 155.0917, GNorm = 0.0669, lr_0 = 1.0348e-04
Loss = 8.4267e-04, PNorm = 155.0933, GNorm = 0.0502, lr_0 = 1.0341e-04
Loss = 4.8143e-04, PNorm = 155.0942, GNorm = 0.0598, lr_0 = 1.0334e-04
Loss = 6.8876e-04, PNorm = 155.0955, GNorm = 0.1049, lr_0 = 1.0327e-04
Loss = 1.7297e-03, PNorm = 155.0969, GNorm = 0.1054, lr_0 = 1.0320e-04
Loss = 1.5791e-03, PNorm = 155.0976, GNorm = 0.0464, lr_0 = 1.0312e-04
Loss = 7.7327e-04, PNorm = 155.0984, GNorm = 0.0362, lr_0 = 1.0305e-04
Loss = 1.6286e-03, PNorm = 155.0991, GNorm = 0.1547, lr_0 = 1.0298e-04
Loss = 5.2838e-04, PNorm = 155.0998, GNorm = 0.0417, lr_0 = 1.0291e-04
Loss = 4.7800e-04, PNorm = 155.1014, GNorm = 0.0468, lr_0 = 1.0284e-04
Loss = 2.0496e-03, PNorm = 155.1013, GNorm = 0.1103, lr_0 = 1.0277e-04
Loss = 2.2731e-03, PNorm = 155.1008, GNorm = 0.1116, lr_0 = 1.0270e-04
Loss = 2.9192e-03, PNorm = 155.1021, GNorm = 0.5014, lr_0 = 1.0263e-04
Loss = 4.3361e-04, PNorm = 155.1045, GNorm = 0.0810, lr_0 = 1.0256e-04
Loss = 2.1108e-03, PNorm = 155.1052, GNorm = 0.1706, lr_0 = 1.0249e-04
Loss = 5.7726e-04, PNorm = 155.1072, GNorm = 0.0326, lr_0 = 1.0242e-04
Loss = 5.9075e-04, PNorm = 155.1084, GNorm = 0.0575, lr_0 = 1.0235e-04
Loss = 4.7578e-04, PNorm = 155.1099, GNorm = 0.0825, lr_0 = 1.0228e-04
Loss = 6.0102e-04, PNorm = 155.1104, GNorm = 0.0996, lr_0 = 1.0221e-04
Loss = 1.0111e-03, PNorm = 155.1123, GNorm = 0.0823, lr_0 = 1.0214e-04
Loss = 1.4213e-03, PNorm = 155.1129, GNorm = 0.0401, lr_0 = 1.0207e-04
Loss = 5.7446e-04, PNorm = 155.1132, GNorm = 0.0370, lr_0 = 1.0200e-04
Loss = 5.4619e-04, PNorm = 155.1140, GNorm = 0.0522, lr_0 = 1.0193e-04
Loss = 2.6825e-03, PNorm = 155.1144, GNorm = 0.0729, lr_0 = 1.0186e-04
Loss = 2.6449e-03, PNorm = 155.1136, GNorm = 0.0645, lr_0 = 1.0179e-04
Loss = 2.5804e-03, PNorm = 155.1146, GNorm = 0.0510, lr_0 = 1.0172e-04
Loss = 1.6032e-03, PNorm = 155.1162, GNorm = 0.1110, lr_0 = 1.0165e-04
Loss = 7.4059e-04, PNorm = 155.1183, GNorm = 0.0401, lr_0 = 1.0158e-04
Loss = 4.4131e-04, PNorm = 155.1205, GNorm = 0.0815, lr_0 = 1.0151e-04
Loss = 3.8211e-03, PNorm = 155.1219, GNorm = 0.0917, lr_0 = 1.0144e-04
Loss = 5.2626e-04, PNorm = 155.1247, GNorm = 0.0398, lr_0 = 1.0137e-04
Loss = 8.1173e-04, PNorm = 155.1262, GNorm = 0.1112, lr_0 = 1.0130e-04
Loss = 1.9016e-03, PNorm = 155.1270, GNorm = 0.0788, lr_0 = 1.0123e-04
Loss = 1.1330e-03, PNorm = 155.1269, GNorm = 0.0292, lr_0 = 1.0116e-04
Loss = 1.7368e-03, PNorm = 155.1284, GNorm = 0.0740, lr_0 = 1.0110e-04
Loss = 1.5840e-03, PNorm = 155.1313, GNorm = 0.1231, lr_0 = 1.0103e-04
Loss = 7.6003e-04, PNorm = 155.1323, GNorm = 0.0395, lr_0 = 1.0096e-04
Loss = 4.7588e-04, PNorm = 155.1334, GNorm = 0.1379, lr_0 = 1.0089e-04
Loss = 7.3678e-04, PNorm = 155.1343, GNorm = 0.1018, lr_0 = 1.0082e-04
Loss = 4.0329e-04, PNorm = 155.1340, GNorm = 0.0464, lr_0 = 1.0075e-04
Loss = 1.4018e-03, PNorm = 155.1345, GNorm = 0.0710, lr_0 = 1.0068e-04
Loss = 4.3648e-04, PNorm = 155.1354, GNorm = 0.0286, lr_0 = 1.0061e-04
Loss = 8.3181e-04, PNorm = 155.1361, GNorm = 0.0580, lr_0 = 1.0054e-04
Loss = 3.9526e-04, PNorm = 155.1371, GNorm = 0.1125, lr_0 = 1.0047e-04
Loss = 1.6334e-03, PNorm = 155.1390, GNorm = 0.0876, lr_0 = 1.0041e-04
Loss = 3.4415e-03, PNorm = 155.1394, GNorm = 0.1157, lr_0 = 1.0034e-04
Loss = 7.0474e-04, PNorm = 155.1396, GNorm = 0.1433, lr_0 = 1.0027e-04
Loss = 5.3229e-04, PNorm = 155.1411, GNorm = 0.1281, lr_0 = 1.0020e-04
Loss = 9.3517e-04, PNorm = 155.1422, GNorm = 0.0251, lr_0 = 1.0013e-04
Loss = 2.2519e-03, PNorm = 155.1444, GNorm = 0.0597, lr_0 = 1.0006e-04
Loss = 5.1217e-04, PNorm = 155.1455, GNorm = 0.0936, lr_0 = 1.0000e-04
Validation mae = 0.474259
Model 0 best validation mae = 0.474033 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.454301
Ensemble test mae = 0.454301
Fold 3
Splitting data with seed 3
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 8.9869e-01, PNorm = 62.2447, GNorm = 1.5742, lr_0 = 1.0413e-04
Loss = 6.4917e-01, PNorm = 62.2572, GNorm = 2.3354, lr_0 = 1.0788e-04
Loss = 5.6855e-01, PNorm = 62.2706, GNorm = 1.6294, lr_0 = 1.1163e-04
Loss = 4.4976e-01, PNorm = 62.2827, GNorm = 1.7420, lr_0 = 1.1537e-04
Loss = 4.6432e-01, PNorm = 62.2922, GNorm = 2.4855, lr_0 = 1.1913e-04
Loss = 4.7195e-01, PNorm = 62.3008, GNorm = 2.0668, lr_0 = 1.2287e-04
Loss = 4.3206e-01, PNorm = 62.3113, GNorm = 1.9395, lr_0 = 1.2663e-04
Loss = 3.9741e-01, PNorm = 62.3207, GNorm = 1.9916, lr_0 = 1.3038e-04
Loss = 3.2718e-01, PNorm = 62.3278, GNorm = 2.2300, lr_0 = 1.3413e-04
Loss = 3.3984e-01, PNorm = 62.3357, GNorm = 1.9074, lr_0 = 1.3788e-04
Loss = 3.5720e-01, PNorm = 62.3455, GNorm = 1.9847, lr_0 = 1.4163e-04
Loss = 3.6443e-01, PNorm = 62.3569, GNorm = 2.1651, lr_0 = 1.4537e-04
Loss = 3.1219e-01, PNorm = 62.3685, GNorm = 2.0188, lr_0 = 1.4913e-04
Loss = 3.3121e-01, PNorm = 62.3793, GNorm = 1.9911, lr_0 = 1.5288e-04
Loss = 3.4045e-01, PNorm = 62.3883, GNorm = 2.2652, lr_0 = 1.5662e-04
Loss = 3.1200e-01, PNorm = 62.3977, GNorm = 2.5129, lr_0 = 1.6038e-04
Loss = 3.1659e-01, PNorm = 62.4112, GNorm = 1.6696, lr_0 = 1.6412e-04
Loss = 3.2084e-01, PNorm = 62.4226, GNorm = 1.6405, lr_0 = 1.6788e-04
Loss = 2.8845e-01, PNorm = 62.4326, GNorm = 1.6451, lr_0 = 1.7163e-04
Loss = 3.0474e-01, PNorm = 62.4455, GNorm = 1.9015, lr_0 = 1.7538e-04
Loss = 3.3582e-01, PNorm = 62.4567, GNorm = 3.8538, lr_0 = 1.7913e-04
Loss = 3.3730e-01, PNorm = 62.4654, GNorm = 2.2893, lr_0 = 1.8288e-04
Loss = 3.4892e-01, PNorm = 62.4783, GNorm = 2.2132, lr_0 = 1.8662e-04
Loss = 3.2215e-01, PNorm = 62.4934, GNorm = 1.4949, lr_0 = 1.9038e-04
Loss = 3.1209e-01, PNorm = 62.5064, GNorm = 1.2893, lr_0 = 1.9413e-04
Loss = 2.9304e-01, PNorm = 62.5203, GNorm = 1.4741, lr_0 = 1.9788e-04
Loss = 2.9916e-01, PNorm = 62.5340, GNorm = 1.6591, lr_0 = 2.0163e-04
Loss = 3.1452e-01, PNorm = 62.5481, GNorm = 1.9359, lr_0 = 2.0537e-04
Loss = 3.2307e-01, PNorm = 62.5619, GNorm = 1.5439, lr_0 = 2.0913e-04
Loss = 2.5219e-01, PNorm = 62.5760, GNorm = 2.7710, lr_0 = 2.1288e-04
Loss = 3.2062e-01, PNorm = 62.5927, GNorm = 2.6112, lr_0 = 2.1663e-04
Loss = 2.7501e-01, PNorm = 62.6103, GNorm = 2.3950, lr_0 = 2.2038e-04
Loss = 2.8929e-01, PNorm = 62.6233, GNorm = 1.8091, lr_0 = 2.2412e-04
Loss = 2.9550e-01, PNorm = 62.6395, GNorm = 2.1604, lr_0 = 2.2787e-04
Loss = 2.8721e-01, PNorm = 62.6569, GNorm = 1.7485, lr_0 = 2.3163e-04
Loss = 2.6939e-01, PNorm = 62.6737, GNorm = 1.4557, lr_0 = 2.3538e-04
Loss = 3.2629e-01, PNorm = 62.6919, GNorm = 1.9747, lr_0 = 2.3913e-04
Loss = 2.6438e-01, PNorm = 62.7088, GNorm = 1.9716, lr_0 = 2.4288e-04
Loss = 2.7955e-01, PNorm = 62.7293, GNorm = 2.8006, lr_0 = 2.4662e-04
Loss = 2.7590e-01, PNorm = 62.7445, GNorm = 1.9320, lr_0 = 2.5038e-04
Loss = 2.7277e-01, PNorm = 62.7638, GNorm = 1.5369, lr_0 = 2.5413e-04
Loss = 2.4710e-01, PNorm = 62.7828, GNorm = 1.7383, lr_0 = 2.5788e-04
Loss = 2.5826e-01, PNorm = 62.7976, GNorm = 1.3723, lr_0 = 2.6163e-04
Loss = 2.9576e-01, PNorm = 62.8194, GNorm = 1.1860, lr_0 = 2.6537e-04
Loss = 2.6703e-01, PNorm = 62.8394, GNorm = 1.3781, lr_0 = 2.6912e-04
Loss = 2.6569e-01, PNorm = 62.8589, GNorm = 1.1656, lr_0 = 2.7288e-04
Loss = 2.8655e-01, PNorm = 62.8796, GNorm = 1.1885, lr_0 = 2.7663e-04
Loss = 2.6395e-01, PNorm = 62.9043, GNorm = 1.3263, lr_0 = 2.8038e-04
Loss = 2.5379e-01, PNorm = 62.9257, GNorm = 1.1613, lr_0 = 2.8413e-04
Loss = 2.5482e-01, PNorm = 62.9461, GNorm = 2.2191, lr_0 = 2.8787e-04
Loss = 2.6514e-01, PNorm = 62.9705, GNorm = 1.3337, lr_0 = 2.9163e-04
Loss = 2.4397e-01, PNorm = 62.9905, GNorm = 1.0885, lr_0 = 2.9538e-04
Loss = 2.8042e-01, PNorm = 63.0120, GNorm = 1.6858, lr_0 = 2.9913e-04
Loss = 2.4348e-01, PNorm = 63.0354, GNorm = 1.1226, lr_0 = 3.0288e-04
Loss = 2.6691e-01, PNorm = 63.0585, GNorm = 1.5114, lr_0 = 3.0662e-04
Loss = 2.5954e-01, PNorm = 63.0840, GNorm = 1.7387, lr_0 = 3.1037e-04
Loss = 2.8313e-01, PNorm = 63.1098, GNorm = 1.0484, lr_0 = 3.1413e-04
Loss = 2.4616e-01, PNorm = 63.1368, GNorm = 1.6248, lr_0 = 3.1788e-04
Loss = 2.5359e-01, PNorm = 63.1646, GNorm = 1.2399, lr_0 = 3.2163e-04
Loss = 2.3064e-01, PNorm = 63.1898, GNorm = 1.1005, lr_0 = 3.2538e-04
Loss = 2.4458e-01, PNorm = 63.2122, GNorm = 1.5714, lr_0 = 3.2912e-04
Loss = 2.5396e-01, PNorm = 63.2384, GNorm = 1.1360, lr_0 = 3.3288e-04
Loss = 2.2304e-01, PNorm = 63.2648, GNorm = 1.0551, lr_0 = 3.3663e-04
Loss = 2.3166e-01, PNorm = 63.2914, GNorm = 0.9974, lr_0 = 3.4038e-04
Loss = 2.4261e-01, PNorm = 63.3176, GNorm = 1.0614, lr_0 = 3.4413e-04
Loss = 2.0848e-01, PNorm = 63.3469, GNorm = 1.4504, lr_0 = 3.4787e-04
Loss = 2.1185e-01, PNorm = 63.3711, GNorm = 1.0321, lr_0 = 3.5162e-04
Loss = 2.6125e-01, PNorm = 63.3989, GNorm = 1.1361, lr_0 = 3.5538e-04
Loss = 2.5750e-01, PNorm = 63.4300, GNorm = 1.9400, lr_0 = 3.5913e-04
Loss = 2.1271e-01, PNorm = 63.4603, GNorm = 1.0373, lr_0 = 3.6288e-04
Loss = 2.4511e-01, PNorm = 63.4897, GNorm = 1.2889, lr_0 = 3.6662e-04
Loss = 2.4639e-01, PNorm = 63.5203, GNorm = 1.6106, lr_0 = 3.7037e-04
Loss = 2.3992e-01, PNorm = 63.5536, GNorm = 1.2947, lr_0 = 3.7413e-04
Loss = 2.8026e-01, PNorm = 63.5877, GNorm = 1.5183, lr_0 = 3.7788e-04
Loss = 2.0466e-01, PNorm = 63.6269, GNorm = 1.2914, lr_0 = 3.8163e-04
Loss = 2.3461e-01, PNorm = 63.6595, GNorm = 1.3905, lr_0 = 3.8537e-04
Loss = 2.3854e-01, PNorm = 63.6951, GNorm = 1.1209, lr_0 = 3.8912e-04
Loss = 2.4504e-01, PNorm = 63.7291, GNorm = 1.1071, lr_0 = 3.9287e-04
Loss = 2.3710e-01, PNorm = 63.7585, GNorm = 1.5454, lr_0 = 3.9663e-04
Loss = 2.2823e-01, PNorm = 63.7921, GNorm = 1.2705, lr_0 = 4.0038e-04
Loss = 2.3872e-01, PNorm = 63.8244, GNorm = 1.0881, lr_0 = 4.0413e-04
Loss = 2.5660e-01, PNorm = 63.8607, GNorm = 1.7010, lr_0 = 4.0787e-04
Loss = 2.6895e-01, PNorm = 63.9007, GNorm = 1.3081, lr_0 = 4.1162e-04
Loss = 2.7420e-01, PNorm = 63.9392, GNorm = 1.1832, lr_0 = 4.1537e-04
Loss = 2.2557e-01, PNorm = 63.9788, GNorm = 1.0071, lr_0 = 4.1913e-04
Loss = 2.6586e-01, PNorm = 64.0144, GNorm = 1.6288, lr_0 = 4.2288e-04
Loss = 2.2301e-01, PNorm = 64.0594, GNorm = 1.4599, lr_0 = 4.2662e-04
Loss = 2.2510e-01, PNorm = 64.0913, GNorm = 1.1017, lr_0 = 4.3037e-04
Loss = 2.4267e-01, PNorm = 64.1295, GNorm = 1.0388, lr_0 = 4.3412e-04
Loss = 2.2406e-01, PNorm = 64.1670, GNorm = 1.4635, lr_0 = 4.3788e-04
Loss = 2.4656e-01, PNorm = 64.2086, GNorm = 1.0105, lr_0 = 4.4163e-04
Loss = 2.1675e-01, PNorm = 64.2494, GNorm = 0.9467, lr_0 = 4.4538e-04
Loss = 2.5037e-01, PNorm = 64.2908, GNorm = 1.1685, lr_0 = 4.4912e-04
Loss = 2.3164e-01, PNorm = 64.3329, GNorm = 0.8157, lr_0 = 4.5287e-04
Loss = 2.4780e-01, PNorm = 64.3786, GNorm = 1.2506, lr_0 = 4.5662e-04
Loss = 2.3893e-01, PNorm = 64.4269, GNorm = 1.4579, lr_0 = 4.6038e-04
Loss = 2.5548e-01, PNorm = 64.4705, GNorm = 0.9040, lr_0 = 4.6413e-04
Loss = 2.0876e-01, PNorm = 64.5226, GNorm = 0.8779, lr_0 = 4.6787e-04
Loss = 2.2004e-01, PNorm = 64.5682, GNorm = 0.8079, lr_0 = 4.7162e-04
Loss = 2.2375e-01, PNorm = 64.6144, GNorm = 1.5894, lr_0 = 4.7537e-04
Loss = 2.2385e-01, PNorm = 64.6616, GNorm = 0.9737, lr_0 = 4.7913e-04
Loss = 2.2690e-01, PNorm = 64.7074, GNorm = 1.1915, lr_0 = 4.8288e-04
Loss = 2.0280e-01, PNorm = 64.7442, GNorm = 0.7710, lr_0 = 4.8663e-04
Loss = 2.2325e-01, PNorm = 64.7904, GNorm = 1.4582, lr_0 = 4.9038e-04
Loss = 2.2987e-01, PNorm = 64.8308, GNorm = 1.5683, lr_0 = 4.9412e-04
Loss = 2.4006e-01, PNorm = 64.8833, GNorm = 1.1232, lr_0 = 4.9788e-04
Loss = 2.1754e-01, PNorm = 64.9339, GNorm = 1.2063, lr_0 = 5.0163e-04
Loss = 2.1477e-01, PNorm = 64.9805, GNorm = 1.0615, lr_0 = 5.0538e-04
Loss = 2.2810e-01, PNorm = 65.0374, GNorm = 1.0621, lr_0 = 5.0913e-04
Loss = 2.0733e-01, PNorm = 65.0869, GNorm = 0.8900, lr_0 = 5.1287e-04
Loss = 2.1541e-01, PNorm = 65.1428, GNorm = 1.0658, lr_0 = 5.1663e-04
Loss = 2.0576e-01, PNorm = 65.1934, GNorm = 1.0309, lr_0 = 5.2038e-04
Loss = 2.6544e-01, PNorm = 65.2437, GNorm = 0.9378, lr_0 = 5.2413e-04
Loss = 2.6911e-01, PNorm = 65.3105, GNorm = 1.0253, lr_0 = 5.2788e-04
Loss = 2.2033e-01, PNorm = 65.3662, GNorm = 0.7781, lr_0 = 5.3162e-04
Loss = 2.1621e-01, PNorm = 65.4280, GNorm = 1.0478, lr_0 = 5.3538e-04
Loss = 2.3487e-01, PNorm = 65.4881, GNorm = 1.3006, lr_0 = 5.3912e-04
Loss = 2.2709e-01, PNorm = 65.5485, GNorm = 1.2688, lr_0 = 5.4288e-04
Loss = 2.2607e-01, PNorm = 65.6064, GNorm = 0.9039, lr_0 = 5.4663e-04
Loss = 2.2961e-01, PNorm = 65.6694, GNorm = 0.8027, lr_0 = 5.5038e-04
Validation mae = 0.562653
Epoch 1
Loss = 1.4096e-01, PNorm = 65.7300, GNorm = 1.2578, lr_0 = 5.5413e-04
Loss = 1.6449e-01, PNorm = 65.7897, GNorm = 1.0198, lr_0 = 5.5787e-04
Loss = 1.4979e-01, PNorm = 65.8419, GNorm = 0.7873, lr_0 = 5.6163e-04
Loss = 1.4630e-01, PNorm = 65.8946, GNorm = 1.0244, lr_0 = 5.6538e-04
Loss = 1.4733e-01, PNorm = 65.9425, GNorm = 0.7544, lr_0 = 5.6913e-04
Loss = 1.4404e-01, PNorm = 65.9921, GNorm = 0.6604, lr_0 = 5.7288e-04
Loss = 1.4028e-01, PNorm = 66.0400, GNorm = 1.0363, lr_0 = 5.7662e-04
Loss = 1.5235e-01, PNorm = 66.0817, GNorm = 0.7937, lr_0 = 5.8038e-04
Loss = 1.5827e-01, PNorm = 66.1354, GNorm = 0.8491, lr_0 = 5.8413e-04
Loss = 1.7198e-01, PNorm = 66.1974, GNorm = 1.2283, lr_0 = 5.8788e-04
Loss = 1.4258e-01, PNorm = 66.2592, GNorm = 0.8484, lr_0 = 5.9163e-04
Loss = 1.4469e-01, PNorm = 66.3151, GNorm = 0.7773, lr_0 = 5.9538e-04
Loss = 1.4477e-01, PNorm = 66.3710, GNorm = 0.8797, lr_0 = 5.9913e-04
Loss = 1.2568e-01, PNorm = 66.4268, GNorm = 0.5258, lr_0 = 6.0288e-04
Loss = 1.7262e-01, PNorm = 66.4829, GNorm = 0.9362, lr_0 = 6.0663e-04
Loss = 1.7698e-01, PNorm = 66.5466, GNorm = 0.9787, lr_0 = 6.1038e-04
Loss = 1.3149e-01, PNorm = 66.6078, GNorm = 0.7058, lr_0 = 6.1413e-04
Loss = 1.3595e-01, PNorm = 66.6687, GNorm = 0.6466, lr_0 = 6.1788e-04
Loss = 1.6022e-01, PNorm = 66.7252, GNorm = 0.7043, lr_0 = 6.2163e-04
Loss = 1.3932e-01, PNorm = 66.7920, GNorm = 0.7549, lr_0 = 6.2538e-04
Loss = 1.3242e-01, PNorm = 66.8483, GNorm = 0.7191, lr_0 = 6.2913e-04
Loss = 1.4839e-01, PNorm = 66.9106, GNorm = 1.1294, lr_0 = 6.3288e-04
Loss = 1.6047e-01, PNorm = 66.9789, GNorm = 1.0105, lr_0 = 6.3663e-04
Loss = 1.6707e-01, PNorm = 67.0518, GNorm = 0.6557, lr_0 = 6.4038e-04
Loss = 1.5144e-01, PNorm = 67.1261, GNorm = 0.7597, lr_0 = 6.4413e-04
Loss = 1.6054e-01, PNorm = 67.2001, GNorm = 0.7799, lr_0 = 6.4788e-04
Loss = 1.5970e-01, PNorm = 67.2875, GNorm = 1.0740, lr_0 = 6.5163e-04
Loss = 1.5244e-01, PNorm = 67.3569, GNorm = 0.9575, lr_0 = 6.5538e-04
Loss = 1.7128e-01, PNorm = 67.4392, GNorm = 0.9490, lr_0 = 6.5913e-04
Loss = 1.4753e-01, PNorm = 67.5245, GNorm = 0.7629, lr_0 = 6.6288e-04
Loss = 1.5496e-01, PNorm = 67.6059, GNorm = 1.0272, lr_0 = 6.6663e-04
Loss = 1.5086e-01, PNorm = 67.6920, GNorm = 0.7720, lr_0 = 6.7038e-04
Loss = 1.5819e-01, PNorm = 67.7685, GNorm = 0.7837, lr_0 = 6.7413e-04
Loss = 1.7198e-01, PNorm = 67.8544, GNorm = 0.7519, lr_0 = 6.7788e-04
Loss = 1.5928e-01, PNorm = 67.9431, GNorm = 0.9203, lr_0 = 6.8163e-04
Loss = 1.5197e-01, PNorm = 68.0242, GNorm = 1.1044, lr_0 = 6.8538e-04
Loss = 1.5294e-01, PNorm = 68.1122, GNorm = 0.6698, lr_0 = 6.8913e-04
Loss = 1.5354e-01, PNorm = 68.1886, GNorm = 0.8140, lr_0 = 6.9288e-04
Loss = 1.7990e-01, PNorm = 68.2749, GNorm = 0.6540, lr_0 = 6.9663e-04
Loss = 1.5673e-01, PNorm = 68.3552, GNorm = 1.0013, lr_0 = 7.0038e-04
Loss = 1.5232e-01, PNorm = 68.4291, GNorm = 0.8297, lr_0 = 7.0413e-04
Loss = 1.6607e-01, PNorm = 68.5123, GNorm = 1.4347, lr_0 = 7.0788e-04
Loss = 1.4442e-01, PNorm = 68.6069, GNorm = 0.7463, lr_0 = 7.1163e-04
Loss = 1.9466e-01, PNorm = 68.6977, GNorm = 0.9704, lr_0 = 7.1538e-04
Loss = 1.4523e-01, PNorm = 68.7951, GNorm = 0.7302, lr_0 = 7.1913e-04
Loss = 1.5917e-01, PNorm = 68.8875, GNorm = 0.9252, lr_0 = 7.2288e-04
Loss = 1.6875e-01, PNorm = 68.9727, GNorm = 0.6696, lr_0 = 7.2663e-04
Loss = 1.9190e-01, PNorm = 69.0854, GNorm = 0.9803, lr_0 = 7.3038e-04
Loss = 1.7517e-01, PNorm = 69.1874, GNorm = 0.8288, lr_0 = 7.3413e-04
Loss = 1.7702e-01, PNorm = 69.2965, GNorm = 1.0485, lr_0 = 7.3788e-04
Loss = 1.5813e-01, PNorm = 69.3985, GNorm = 0.9732, lr_0 = 7.4163e-04
Loss = 1.4899e-01, PNorm = 69.4988, GNorm = 0.7504, lr_0 = 7.4538e-04
Loss = 1.5664e-01, PNorm = 69.5869, GNorm = 0.5702, lr_0 = 7.4913e-04
Loss = 1.8495e-01, PNorm = 69.6904, GNorm = 0.8280, lr_0 = 7.5288e-04
Loss = 1.5203e-01, PNorm = 69.7848, GNorm = 0.8396, lr_0 = 7.5663e-04
Loss = 1.6495e-01, PNorm = 69.8740, GNorm = 0.8626, lr_0 = 7.6038e-04
Loss = 1.8194e-01, PNorm = 69.9735, GNorm = 0.9282, lr_0 = 7.6413e-04
Loss = 1.5667e-01, PNorm = 70.0719, GNorm = 0.7135, lr_0 = 7.6788e-04
Loss = 1.6800e-01, PNorm = 70.1689, GNorm = 0.7997, lr_0 = 7.7163e-04
Loss = 1.6204e-01, PNorm = 70.2595, GNorm = 0.7053, lr_0 = 7.7538e-04
Loss = 1.5948e-01, PNorm = 70.3568, GNorm = 0.8584, lr_0 = 7.7913e-04
Loss = 1.8246e-01, PNorm = 70.4552, GNorm = 0.9344, lr_0 = 7.8288e-04
Loss = 1.4699e-01, PNorm = 70.5515, GNorm = 0.9639, lr_0 = 7.8663e-04
Loss = 1.6637e-01, PNorm = 70.6562, GNorm = 0.6880, lr_0 = 7.9038e-04
Loss = 1.7747e-01, PNorm = 70.7536, GNorm = 0.8191, lr_0 = 7.9413e-04
Loss = 1.6067e-01, PNorm = 70.8604, GNorm = 0.6286, lr_0 = 7.9788e-04
Loss = 1.7091e-01, PNorm = 70.9648, GNorm = 0.7215, lr_0 = 8.0163e-04
Loss = 1.7195e-01, PNorm = 71.0761, GNorm = 0.7386, lr_0 = 8.0538e-04
Loss = 1.6939e-01, PNorm = 71.1749, GNorm = 0.7504, lr_0 = 8.0913e-04
Loss = 1.5570e-01, PNorm = 71.2765, GNorm = 0.8000, lr_0 = 8.1288e-04
Loss = 1.7125e-01, PNorm = 71.3741, GNorm = 0.6699, lr_0 = 8.1663e-04
Loss = 1.5632e-01, PNorm = 71.4769, GNorm = 0.7558, lr_0 = 8.2038e-04
Loss = 1.5177e-01, PNorm = 71.5799, GNorm = 0.7544, lr_0 = 8.2413e-04
Loss = 1.5972e-01, PNorm = 71.6721, GNorm = 1.1055, lr_0 = 8.2788e-04
Loss = 1.6418e-01, PNorm = 71.7688, GNorm = 1.1003, lr_0 = 8.3163e-04
Loss = 1.6627e-01, PNorm = 71.8856, GNorm = 0.8188, lr_0 = 8.3538e-04
Loss = 1.8249e-01, PNorm = 71.9812, GNorm = 0.6806, lr_0 = 8.3913e-04
Loss = 1.8596e-01, PNorm = 72.1095, GNorm = 1.1459, lr_0 = 8.4288e-04
Loss = 1.8796e-01, PNorm = 72.2211, GNorm = 0.9946, lr_0 = 8.4663e-04
Loss = 1.7226e-01, PNorm = 72.3504, GNorm = 1.1210, lr_0 = 8.5038e-04
Loss = 1.5920e-01, PNorm = 72.4757, GNorm = 0.5881, lr_0 = 8.5413e-04
Loss = 1.8849e-01, PNorm = 72.6085, GNorm = 0.8009, lr_0 = 8.5788e-04
Loss = 1.7478e-01, PNorm = 72.7324, GNorm = 0.8694, lr_0 = 8.6163e-04
Loss = 1.9291e-01, PNorm = 72.8559, GNorm = 0.8160, lr_0 = 8.6538e-04
Loss = 1.9911e-01, PNorm = 72.9874, GNorm = 0.8992, lr_0 = 8.6913e-04
Loss = 1.7640e-01, PNorm = 73.1237, GNorm = 0.7335, lr_0 = 8.7288e-04
Loss = 1.5954e-01, PNorm = 73.2493, GNorm = 0.6431, lr_0 = 8.7663e-04
Loss = 1.8361e-01, PNorm = 73.3719, GNorm = 0.8783, lr_0 = 8.8038e-04
Loss = 1.6067e-01, PNorm = 73.4828, GNorm = 0.6350, lr_0 = 8.8413e-04
Loss = 1.6612e-01, PNorm = 73.6029, GNorm = 0.6918, lr_0 = 8.8788e-04
Loss = 2.0192e-01, PNorm = 73.7061, GNorm = 0.9782, lr_0 = 8.9163e-04
Loss = 1.6393e-01, PNorm = 73.8261, GNorm = 1.0235, lr_0 = 8.9538e-04
Loss = 1.6943e-01, PNorm = 73.9473, GNorm = 0.8958, lr_0 = 8.9913e-04
Loss = 1.6981e-01, PNorm = 74.0684, GNorm = 0.7064, lr_0 = 9.0288e-04
Loss = 1.8692e-01, PNorm = 74.1927, GNorm = 0.6592, lr_0 = 9.0663e-04
Loss = 1.7998e-01, PNorm = 74.3030, GNorm = 0.7769, lr_0 = 9.1038e-04
Loss = 1.8915e-01, PNorm = 74.4279, GNorm = 1.0025, lr_0 = 9.1413e-04
Loss = 1.4424e-01, PNorm = 74.5619, GNorm = 0.5240, lr_0 = 9.1788e-04
Loss = 1.8535e-01, PNorm = 74.6852, GNorm = 0.8064, lr_0 = 9.2163e-04
Loss = 2.0903e-01, PNorm = 74.8215, GNorm = 0.7205, lr_0 = 9.2538e-04
Loss = 1.7479e-01, PNorm = 74.9792, GNorm = 1.3904, lr_0 = 9.2913e-04
Loss = 1.7188e-01, PNorm = 75.1077, GNorm = 0.6218, lr_0 = 9.3288e-04
Loss = 1.8094e-01, PNorm = 75.2438, GNorm = 0.8967, lr_0 = 9.3663e-04
Loss = 1.8567e-01, PNorm = 75.3795, GNorm = 0.9374, lr_0 = 9.4038e-04
Loss = 1.9305e-01, PNorm = 75.5189, GNorm = 0.7997, lr_0 = 9.4413e-04
Loss = 1.9462e-01, PNorm = 75.6737, GNorm = 0.6723, lr_0 = 9.4788e-04
Loss = 1.8916e-01, PNorm = 75.8142, GNorm = 0.7397, lr_0 = 9.5163e-04
Loss = 1.5308e-01, PNorm = 75.9507, GNorm = 0.6290, lr_0 = 9.5538e-04
Loss = 1.7448e-01, PNorm = 76.0723, GNorm = 0.6683, lr_0 = 9.5913e-04
Loss = 1.7869e-01, PNorm = 76.2117, GNorm = 0.6699, lr_0 = 9.6288e-04
Loss = 1.6992e-01, PNorm = 76.3382, GNorm = 1.0940, lr_0 = 9.6663e-04
Loss = 1.9755e-01, PNorm = 76.4819, GNorm = 0.8243, lr_0 = 9.7038e-04
Loss = 1.7913e-01, PNorm = 76.6087, GNorm = 0.6045, lr_0 = 9.7413e-04
Loss = 1.6099e-01, PNorm = 76.7479, GNorm = 0.8449, lr_0 = 9.7788e-04
Loss = 1.9687e-01, PNorm = 76.8878, GNorm = 1.0606, lr_0 = 9.8163e-04
Loss = 1.9914e-01, PNorm = 77.0280, GNorm = 0.6240, lr_0 = 9.8537e-04
Loss = 1.7763e-01, PNorm = 77.1738, GNorm = 0.5770, lr_0 = 9.8912e-04
Loss = 1.6908e-01, PNorm = 77.3146, GNorm = 0.8691, lr_0 = 9.9288e-04
Loss = 1.7482e-01, PNorm = 77.4554, GNorm = 1.1829, lr_0 = 9.9663e-04
Loss = 1.8849e-01, PNorm = 77.5861, GNorm = 0.7625, lr_0 = 9.9993e-04
Validation mae = 0.539936
Epoch 2
Loss = 1.2420e-01, PNorm = 77.7077, GNorm = 0.4588, lr_0 = 9.9925e-04
Loss = 1.3177e-01, PNorm = 77.8412, GNorm = 0.5685, lr_0 = 9.9856e-04
Loss = 1.0013e-01, PNorm = 77.9432, GNorm = 0.5557, lr_0 = 9.9788e-04
Loss = 1.0198e-01, PNorm = 78.0480, GNorm = 0.5235, lr_0 = 9.9719e-04
Loss = 1.0983e-01, PNorm = 78.1458, GNorm = 0.4617, lr_0 = 9.9651e-04
Loss = 1.0359e-01, PNorm = 78.2474, GNorm = 0.6910, lr_0 = 9.9583e-04
Loss = 1.2563e-01, PNorm = 78.3480, GNorm = 0.6849, lr_0 = 9.9515e-04
Loss = 1.0457e-01, PNorm = 78.4764, GNorm = 0.5197, lr_0 = 9.9446e-04
Loss = 1.1272e-01, PNorm = 78.5873, GNorm = 0.6487, lr_0 = 9.9378e-04
Loss = 1.0652e-01, PNorm = 78.6940, GNorm = 0.6727, lr_0 = 9.9310e-04
Loss = 1.0972e-01, PNorm = 78.8028, GNorm = 0.5959, lr_0 = 9.9242e-04
Loss = 9.9245e-02, PNorm = 78.9080, GNorm = 0.6478, lr_0 = 9.9174e-04
Loss = 1.1382e-01, PNorm = 79.0180, GNorm = 0.8459, lr_0 = 9.9106e-04
Loss = 1.0386e-01, PNorm = 79.1204, GNorm = 0.5798, lr_0 = 9.9038e-04
Loss = 1.1905e-01, PNorm = 79.2230, GNorm = 0.7596, lr_0 = 9.8971e-04
Loss = 8.9420e-02, PNorm = 79.3232, GNorm = 0.5130, lr_0 = 9.8903e-04
Loss = 1.2897e-01, PNorm = 79.4051, GNorm = 0.7646, lr_0 = 9.8835e-04
Loss = 1.1632e-01, PNorm = 79.5151, GNorm = 0.5716, lr_0 = 9.8767e-04
Loss = 9.3840e-02, PNorm = 79.6093, GNorm = 0.4879, lr_0 = 9.8700e-04
Loss = 9.0782e-02, PNorm = 79.7102, GNorm = 0.5474, lr_0 = 9.8632e-04
Loss = 1.0041e-01, PNorm = 79.7986, GNorm = 0.4557, lr_0 = 9.8564e-04
Loss = 9.7195e-02, PNorm = 79.8814, GNorm = 1.1030, lr_0 = 9.8497e-04
Loss = 1.0044e-01, PNorm = 79.9670, GNorm = 0.4621, lr_0 = 9.8429e-04
Loss = 9.8238e-02, PNorm = 80.0738, GNorm = 0.8035, lr_0 = 9.8362e-04
Loss = 9.5903e-02, PNorm = 80.1555, GNorm = 0.4933, lr_0 = 9.8295e-04
Loss = 1.1875e-01, PNorm = 80.2641, GNorm = 0.6112, lr_0 = 9.8227e-04
Loss = 1.1318e-01, PNorm = 80.3624, GNorm = 0.5916, lr_0 = 9.8160e-04
Loss = 1.1465e-01, PNorm = 80.4722, GNorm = 0.4649, lr_0 = 9.8093e-04
Loss = 1.1337e-01, PNorm = 80.5726, GNorm = 0.6792, lr_0 = 9.8026e-04
Loss = 1.0287e-01, PNorm = 80.6725, GNorm = 0.5911, lr_0 = 9.7958e-04
Loss = 1.1110e-01, PNorm = 80.7745, GNorm = 0.5763, lr_0 = 9.7891e-04
Loss = 1.0167e-01, PNorm = 80.8725, GNorm = 0.6842, lr_0 = 9.7824e-04
Loss = 1.0043e-01, PNorm = 80.9801, GNorm = 1.0841, lr_0 = 9.7757e-04
Loss = 1.0970e-01, PNorm = 81.0742, GNorm = 0.4451, lr_0 = 9.7690e-04
Loss = 1.1733e-01, PNorm = 81.1704, GNorm = 0.7853, lr_0 = 9.7623e-04
Loss = 1.1683e-01, PNorm = 81.2801, GNorm = 0.5264, lr_0 = 9.7556e-04
Loss = 1.0066e-01, PNorm = 81.3801, GNorm = 0.6855, lr_0 = 9.7490e-04
Loss = 1.0080e-01, PNorm = 81.4826, GNorm = 0.5297, lr_0 = 9.7423e-04
Loss = 1.1036e-01, PNorm = 81.5840, GNorm = 0.4456, lr_0 = 9.7356e-04
Loss = 1.0051e-01, PNorm = 81.6931, GNorm = 0.5380, lr_0 = 9.7289e-04
Loss = 1.2493e-01, PNorm = 81.7955, GNorm = 0.7533, lr_0 = 9.7223e-04
Loss = 1.2048e-01, PNorm = 81.9038, GNorm = 0.7234, lr_0 = 9.7156e-04
Loss = 1.1497e-01, PNorm = 82.0210, GNorm = 0.7801, lr_0 = 9.7090e-04
Loss = 1.1537e-01, PNorm = 82.1234, GNorm = 1.1476, lr_0 = 9.7023e-04
Loss = 1.2343e-01, PNorm = 82.2420, GNorm = 0.8448, lr_0 = 9.6957e-04
Loss = 1.1428e-01, PNorm = 82.3657, GNorm = 0.7655, lr_0 = 9.6890e-04
Loss = 1.1712e-01, PNorm = 82.4775, GNorm = 0.4628, lr_0 = 9.6824e-04
Loss = 1.1791e-01, PNorm = 82.6028, GNorm = 0.5630, lr_0 = 9.6757e-04
Loss = 9.8796e-02, PNorm = 82.7130, GNorm = 0.3925, lr_0 = 9.6691e-04
Loss = 1.2076e-01, PNorm = 82.8213, GNorm = 0.7154, lr_0 = 9.6625e-04
Loss = 1.1950e-01, PNorm = 82.9266, GNorm = 0.4921, lr_0 = 9.6559e-04
Loss = 1.1080e-01, PNorm = 83.0436, GNorm = 0.4902, lr_0 = 9.6493e-04
Loss = 1.1511e-01, PNorm = 83.1652, GNorm = 0.7395, lr_0 = 9.6427e-04
Loss = 1.1517e-01, PNorm = 83.2797, GNorm = 0.6020, lr_0 = 9.6360e-04
Loss = 1.0245e-01, PNorm = 83.3834, GNorm = 0.6747, lr_0 = 9.6294e-04
Loss = 1.0829e-01, PNorm = 83.4902, GNorm = 0.7833, lr_0 = 9.6228e-04
Loss = 1.2192e-01, PNorm = 83.6073, GNorm = 0.8618, lr_0 = 9.6163e-04
Loss = 1.2246e-01, PNorm = 83.7204, GNorm = 0.4674, lr_0 = 9.6097e-04
Loss = 1.1597e-01, PNorm = 83.8281, GNorm = 0.6878, lr_0 = 9.6031e-04
Loss = 9.5967e-02, PNorm = 83.9402, GNorm = 0.6114, lr_0 = 9.5965e-04
Loss = 1.1867e-01, PNorm = 84.0523, GNorm = 0.8803, lr_0 = 9.5899e-04
Loss = 1.0490e-01, PNorm = 84.1511, GNorm = 0.6769, lr_0 = 9.5834e-04
Loss = 1.1271e-01, PNorm = 84.2574, GNorm = 1.6875, lr_0 = 9.5768e-04
Loss = 1.2402e-01, PNorm = 84.3627, GNorm = 0.7483, lr_0 = 9.5702e-04
Loss = 1.1242e-01, PNorm = 84.4786, GNorm = 0.4821, lr_0 = 9.5637e-04
Loss = 1.1396e-01, PNorm = 84.5935, GNorm = 0.5596, lr_0 = 9.5571e-04
Loss = 1.2918e-01, PNorm = 84.7023, GNorm = 0.5892, lr_0 = 9.5506e-04
Loss = 1.1053e-01, PNorm = 84.8083, GNorm = 0.6394, lr_0 = 9.5440e-04
Loss = 1.0111e-01, PNorm = 84.9209, GNorm = 1.0104, lr_0 = 9.5375e-04
Loss = 1.2152e-01, PNorm = 85.0275, GNorm = 0.4284, lr_0 = 9.5310e-04
Loss = 1.0792e-01, PNorm = 85.1318, GNorm = 0.5157, lr_0 = 9.5244e-04
Loss = 1.1825e-01, PNorm = 85.2435, GNorm = 0.4740, lr_0 = 9.5179e-04
Loss = 1.0643e-01, PNorm = 85.3458, GNorm = 0.4844, lr_0 = 9.5114e-04
Loss = 1.0150e-01, PNorm = 85.4475, GNorm = 0.7571, lr_0 = 9.5049e-04
Loss = 1.2627e-01, PNorm = 85.5585, GNorm = 0.7670, lr_0 = 9.4984e-04
Loss = 1.0971e-01, PNorm = 85.6741, GNorm = 0.4599, lr_0 = 9.4919e-04
Loss = 1.1243e-01, PNorm = 85.7834, GNorm = 0.5138, lr_0 = 9.4854e-04
Loss = 1.1126e-01, PNorm = 85.9030, GNorm = 1.6161, lr_0 = 9.4789e-04
Loss = 1.1297e-01, PNorm = 86.0078, GNorm = 0.4808, lr_0 = 9.4724e-04
Loss = 1.1903e-01, PNorm = 86.1183, GNorm = 0.5028, lr_0 = 9.4659e-04
Loss = 1.1305e-01, PNorm = 86.2369, GNorm = 0.5514, lr_0 = 9.4594e-04
Loss = 1.1570e-01, PNorm = 86.3498, GNorm = 1.0864, lr_0 = 9.4529e-04
Loss = 9.7265e-02, PNorm = 86.4612, GNorm = 0.5068, lr_0 = 9.4464e-04
Loss = 1.0663e-01, PNorm = 86.5696, GNorm = 0.4855, lr_0 = 9.4400e-04
Loss = 1.2465e-01, PNorm = 86.6708, GNorm = 1.0776, lr_0 = 9.4335e-04
Loss = 1.2430e-01, PNorm = 86.7772, GNorm = 0.7012, lr_0 = 9.4270e-04
Loss = 1.0944e-01, PNorm = 86.8871, GNorm = 0.8372, lr_0 = 9.4206e-04
Loss = 1.0569e-01, PNorm = 86.9796, GNorm = 0.5947, lr_0 = 9.4141e-04
Loss = 1.2269e-01, PNorm = 87.0689, GNorm = 0.8439, lr_0 = 9.4077e-04
Loss = 1.0667e-01, PNorm = 87.1680, GNorm = 0.8048, lr_0 = 9.4012e-04
Loss = 1.0287e-01, PNorm = 87.2600, GNorm = 0.4656, lr_0 = 9.3948e-04
Loss = 1.2405e-01, PNorm = 87.3558, GNorm = 0.8150, lr_0 = 9.3884e-04
Loss = 1.1367e-01, PNorm = 87.4638, GNorm = 1.1567, lr_0 = 9.3819e-04
Loss = 1.0517e-01, PNorm = 87.5692, GNorm = 0.7224, lr_0 = 9.3755e-04
Loss = 1.2125e-01, PNorm = 87.6774, GNorm = 0.8318, lr_0 = 9.3691e-04
Loss = 1.2831e-01, PNorm = 87.7899, GNorm = 0.6381, lr_0 = 9.3627e-04
Loss = 1.2014e-01, PNorm = 87.9082, GNorm = 1.0585, lr_0 = 9.3562e-04
Loss = 1.2284e-01, PNorm = 88.0334, GNorm = 0.8695, lr_0 = 9.3498e-04
Loss = 1.1889e-01, PNorm = 88.1582, GNorm = 0.4788, lr_0 = 9.3434e-04
Loss = 1.0713e-01, PNorm = 88.2794, GNorm = 0.5677, lr_0 = 9.3370e-04
Loss = 1.0887e-01, PNorm = 88.3841, GNorm = 0.9789, lr_0 = 9.3306e-04
Loss = 1.1494e-01, PNorm = 88.4902, GNorm = 0.8916, lr_0 = 9.3242e-04
Loss = 1.1622e-01, PNorm = 88.5954, GNorm = 0.4016, lr_0 = 9.3178e-04
Loss = 1.0707e-01, PNorm = 88.7013, GNorm = 0.8592, lr_0 = 9.3115e-04
Loss = 1.0234e-01, PNorm = 88.7988, GNorm = 1.0472, lr_0 = 9.3051e-04
Loss = 1.1841e-01, PNorm = 88.8944, GNorm = 0.7272, lr_0 = 9.2987e-04
Loss = 1.0790e-01, PNorm = 88.9937, GNorm = 0.5333, lr_0 = 9.2923e-04
Loss = 1.3337e-01, PNorm = 89.0850, GNorm = 0.7227, lr_0 = 9.2860e-04
Loss = 1.2892e-01, PNorm = 89.1927, GNorm = 0.6172, lr_0 = 9.2796e-04
Loss = 1.1959e-01, PNorm = 89.3030, GNorm = 0.6142, lr_0 = 9.2733e-04
Loss = 1.2387e-01, PNorm = 89.4146, GNorm = 0.5962, lr_0 = 9.2669e-04
Loss = 1.2757e-01, PNorm = 89.5207, GNorm = 0.7632, lr_0 = 9.2606e-04
Loss = 1.1169e-01, PNorm = 89.6301, GNorm = 0.6414, lr_0 = 9.2542e-04
Loss = 1.0281e-01, PNorm = 89.7352, GNorm = 0.7815, lr_0 = 9.2479e-04
Loss = 1.1616e-01, PNorm = 89.8217, GNorm = 0.5690, lr_0 = 9.2415e-04
Loss = 1.3730e-01, PNorm = 89.9390, GNorm = 0.7158, lr_0 = 9.2352e-04
Loss = 1.3137e-01, PNorm = 90.0581, GNorm = 0.9524, lr_0 = 9.2289e-04
Loss = 1.1701e-01, PNorm = 90.1710, GNorm = 0.7010, lr_0 = 9.2226e-04
Loss = 1.0671e-01, PNorm = 90.2723, GNorm = 1.0579, lr_0 = 9.2162e-04
Loss = 1.2956e-01, PNorm = 90.3691, GNorm = 0.7410, lr_0 = 9.2099e-04
Validation mae = 0.518257
Epoch 3
Loss = 8.1551e-02, PNorm = 90.4608, GNorm = 0.4578, lr_0 = 9.2036e-04
Loss = 6.9000e-02, PNorm = 90.5385, GNorm = 0.5324, lr_0 = 9.1973e-04
Loss = 7.7296e-02, PNorm = 90.6025, GNorm = 0.4680, lr_0 = 9.1910e-04
Loss = 6.8667e-02, PNorm = 90.6673, GNorm = 0.5153, lr_0 = 9.1847e-04
Loss = 7.3908e-02, PNorm = 90.7359, GNorm = 0.5748, lr_0 = 9.1784e-04
Loss = 6.5575e-02, PNorm = 90.8118, GNorm = 0.3127, lr_0 = 9.1721e-04
Loss = 6.3086e-02, PNorm = 90.8828, GNorm = 0.5684, lr_0 = 9.1658e-04
Loss = 6.1628e-02, PNorm = 90.9416, GNorm = 0.7852, lr_0 = 9.1596e-04
Loss = 6.4909e-02, PNorm = 91.0117, GNorm = 0.3925, lr_0 = 9.1533e-04
Loss = 6.9753e-02, PNorm = 91.0740, GNorm = 0.6498, lr_0 = 9.1470e-04
Loss = 6.7208e-02, PNorm = 91.1356, GNorm = 0.4312, lr_0 = 9.1408e-04
Loss = 6.1548e-02, PNorm = 91.2020, GNorm = 0.4497, lr_0 = 9.1345e-04
Loss = 6.6623e-02, PNorm = 91.2716, GNorm = 0.4944, lr_0 = 9.1282e-04
Loss = 7.4127e-02, PNorm = 91.3347, GNorm = 0.5951, lr_0 = 9.1220e-04
Loss = 6.2337e-02, PNorm = 91.4058, GNorm = 0.3265, lr_0 = 9.1157e-04
Loss = 7.4636e-02, PNorm = 91.4802, GNorm = 0.4383, lr_0 = 9.1095e-04
Loss = 6.2415e-02, PNorm = 91.5435, GNorm = 0.5154, lr_0 = 9.1032e-04
Loss = 6.6266e-02, PNorm = 91.6094, GNorm = 0.2880, lr_0 = 9.0970e-04
Loss = 6.7176e-02, PNorm = 91.6728, GNorm = 0.5605, lr_0 = 9.0908e-04
Loss = 6.3969e-02, PNorm = 91.7416, GNorm = 0.4602, lr_0 = 9.0846e-04
Loss = 6.4905e-02, PNorm = 91.8097, GNorm = 0.5491, lr_0 = 9.0783e-04
Loss = 6.1669e-02, PNorm = 91.8735, GNorm = 0.3814, lr_0 = 9.0721e-04
Loss = 7.1796e-02, PNorm = 91.9422, GNorm = 0.5876, lr_0 = 9.0659e-04
Loss = 6.9081e-02, PNorm = 92.0090, GNorm = 0.4238, lr_0 = 9.0597e-04
Loss = 6.3305e-02, PNorm = 92.0810, GNorm = 0.6900, lr_0 = 9.0535e-04
Loss = 7.3605e-02, PNorm = 92.1419, GNorm = 0.4344, lr_0 = 9.0473e-04
Loss = 6.9540e-02, PNorm = 92.2087, GNorm = 0.3595, lr_0 = 9.0411e-04
Loss = 6.9663e-02, PNorm = 92.2823, GNorm = 0.4684, lr_0 = 9.0349e-04
Loss = 5.8876e-02, PNorm = 92.3570, GNorm = 0.6140, lr_0 = 9.0287e-04
Loss = 6.6699e-02, PNorm = 92.4208, GNorm = 0.5206, lr_0 = 9.0225e-04
Loss = 8.0694e-02, PNorm = 92.4946, GNorm = 0.6094, lr_0 = 9.0163e-04
Loss = 6.7648e-02, PNorm = 92.5641, GNorm = 0.6011, lr_0 = 9.0102e-04
Loss = 6.3514e-02, PNorm = 92.6330, GNorm = 0.6053, lr_0 = 9.0040e-04
Loss = 6.8795e-02, PNorm = 92.7057, GNorm = 0.8948, lr_0 = 8.9978e-04
Loss = 5.8249e-02, PNorm = 92.7828, GNorm = 0.4164, lr_0 = 8.9916e-04
Loss = 6.0422e-02, PNorm = 92.8507, GNorm = 0.5384, lr_0 = 8.9855e-04
Loss = 6.4829e-02, PNorm = 92.9213, GNorm = 0.6254, lr_0 = 8.9793e-04
Loss = 7.0412e-02, PNorm = 92.9825, GNorm = 0.5629, lr_0 = 8.9732e-04
Loss = 6.9696e-02, PNorm = 93.0459, GNorm = 0.3888, lr_0 = 8.9670e-04
Loss = 6.8527e-02, PNorm = 93.1143, GNorm = 0.7887, lr_0 = 8.9609e-04
Loss = 6.6344e-02, PNorm = 93.1880, GNorm = 0.6261, lr_0 = 8.9548e-04
Loss = 7.6151e-02, PNorm = 93.2724, GNorm = 0.3611, lr_0 = 8.9486e-04
Loss = 7.1266e-02, PNorm = 93.3536, GNorm = 0.5011, lr_0 = 8.9425e-04
Loss = 6.1351e-02, PNorm = 93.4387, GNorm = 0.9863, lr_0 = 8.9364e-04
Loss = 6.9373e-02, PNorm = 93.5138, GNorm = 0.3082, lr_0 = 8.9302e-04
Loss = 6.0418e-02, PNorm = 93.5891, GNorm = 0.5650, lr_0 = 8.9241e-04
Loss = 6.8714e-02, PNorm = 93.6661, GNorm = 0.4989, lr_0 = 8.9180e-04
Loss = 7.3510e-02, PNorm = 93.7424, GNorm = 0.4948, lr_0 = 8.9119e-04
Loss = 6.5479e-02, PNorm = 93.8215, GNorm = 0.3205, lr_0 = 8.9058e-04
Loss = 6.7530e-02, PNorm = 93.9075, GNorm = 0.5821, lr_0 = 8.8997e-04
Loss = 6.5828e-02, PNorm = 93.9831, GNorm = 0.4381, lr_0 = 8.8936e-04
Loss = 6.6028e-02, PNorm = 94.0569, GNorm = 0.6482, lr_0 = 8.8875e-04
Loss = 7.0023e-02, PNorm = 94.1324, GNorm = 0.6363, lr_0 = 8.8814e-04
Loss = 7.6253e-02, PNorm = 94.2116, GNorm = 0.7396, lr_0 = 8.8753e-04
Loss = 7.6310e-02, PNorm = 94.2893, GNorm = 0.4384, lr_0 = 8.8693e-04
Loss = 7.3786e-02, PNorm = 94.3728, GNorm = 0.8352, lr_0 = 8.8632e-04
Loss = 6.6789e-02, PNorm = 94.4443, GNorm = 0.5114, lr_0 = 8.8571e-04
Loss = 6.8159e-02, PNorm = 94.5308, GNorm = 0.4337, lr_0 = 8.8510e-04
Loss = 6.6277e-02, PNorm = 94.6243, GNorm = 0.7404, lr_0 = 8.8450e-04
Loss = 6.7984e-02, PNorm = 94.7065, GNorm = 0.4259, lr_0 = 8.8389e-04
Loss = 7.5317e-02, PNorm = 94.7905, GNorm = 0.5669, lr_0 = 8.8329e-04
Loss = 8.1022e-02, PNorm = 94.8799, GNorm = 1.1009, lr_0 = 8.8268e-04
Loss = 7.0896e-02, PNorm = 94.9655, GNorm = 0.5113, lr_0 = 8.8208e-04
Loss = 6.5402e-02, PNorm = 95.0489, GNorm = 0.3105, lr_0 = 8.8147e-04
Loss = 6.0475e-02, PNorm = 95.1214, GNorm = 0.2475, lr_0 = 8.8087e-04
Loss = 9.0546e-02, PNorm = 95.1983, GNorm = 0.8058, lr_0 = 8.8026e-04
Loss = 6.5982e-02, PNorm = 95.2826, GNorm = 0.4041, lr_0 = 8.7966e-04
Loss = 6.7962e-02, PNorm = 95.3581, GNorm = 0.5824, lr_0 = 8.7906e-04
Loss = 7.2363e-02, PNorm = 95.4414, GNorm = 0.5333, lr_0 = 8.7846e-04
Loss = 7.1952e-02, PNorm = 95.5247, GNorm = 0.5293, lr_0 = 8.7785e-04
Loss = 7.7406e-02, PNorm = 95.6097, GNorm = 0.3983, lr_0 = 8.7725e-04
Loss = 6.9398e-02, PNorm = 95.6901, GNorm = 0.7212, lr_0 = 8.7665e-04
Loss = 6.4032e-02, PNorm = 95.7690, GNorm = 0.4710, lr_0 = 8.7605e-04
Loss = 6.7269e-02, PNorm = 95.8389, GNorm = 0.7643, lr_0 = 8.7545e-04
Loss = 6.8904e-02, PNorm = 95.9177, GNorm = 0.5449, lr_0 = 8.7485e-04
Loss = 6.0006e-02, PNorm = 95.9824, GNorm = 0.4641, lr_0 = 8.7425e-04
Loss = 5.7839e-02, PNorm = 96.0602, GNorm = 0.4307, lr_0 = 8.7365e-04
Loss = 6.6017e-02, PNorm = 96.1365, GNorm = 0.3480, lr_0 = 8.7306e-04
Loss = 6.4187e-02, PNorm = 96.2073, GNorm = 0.5971, lr_0 = 8.7246e-04
Loss = 7.8500e-02, PNorm = 96.2819, GNorm = 0.3686, lr_0 = 8.7186e-04
Loss = 7.6118e-02, PNorm = 96.3620, GNorm = 0.4591, lr_0 = 8.7126e-04
Loss = 8.1866e-02, PNorm = 96.4407, GNorm = 0.7506, lr_0 = 8.7067e-04
Loss = 8.3497e-02, PNorm = 96.5320, GNorm = 0.6051, lr_0 = 8.7007e-04
Loss = 7.8822e-02, PNorm = 96.6252, GNorm = 0.9973, lr_0 = 8.6947e-04
Loss = 6.7231e-02, PNorm = 96.7107, GNorm = 0.3220, lr_0 = 8.6888e-04
Loss = 7.7416e-02, PNorm = 96.7981, GNorm = 0.4235, lr_0 = 8.6828e-04
Loss = 7.3927e-02, PNorm = 96.8961, GNorm = 0.7142, lr_0 = 8.6769e-04
Loss = 8.0029e-02, PNorm = 96.9983, GNorm = 0.5632, lr_0 = 8.6709e-04
Loss = 6.1827e-02, PNorm = 97.1003, GNorm = 0.7510, lr_0 = 8.6650e-04
Loss = 7.7261e-02, PNorm = 97.1958, GNorm = 0.7646, lr_0 = 8.6590e-04
Loss = 6.7331e-02, PNorm = 97.2830, GNorm = 0.4607, lr_0 = 8.6531e-04
Loss = 7.7629e-02, PNorm = 97.3690, GNorm = 0.7051, lr_0 = 8.6472e-04
Loss = 6.8428e-02, PNorm = 97.4529, GNorm = 0.5709, lr_0 = 8.6413e-04
Loss = 7.2652e-02, PNorm = 97.5318, GNorm = 0.4339, lr_0 = 8.6353e-04
Loss = 6.9813e-02, PNorm = 97.6088, GNorm = 0.5452, lr_0 = 8.6294e-04
Loss = 8.4531e-02, PNorm = 97.6932, GNorm = 0.4221, lr_0 = 8.6235e-04
Loss = 7.4072e-02, PNorm = 97.7798, GNorm = 0.9629, lr_0 = 8.6176e-04
Loss = 7.7581e-02, PNorm = 97.8635, GNorm = 0.5180, lr_0 = 8.6117e-04
Loss = 7.6796e-02, PNorm = 97.9539, GNorm = 0.4111, lr_0 = 8.6058e-04
Loss = 8.1497e-02, PNorm = 98.0445, GNorm = 0.3744, lr_0 = 8.5999e-04
Loss = 8.1313e-02, PNorm = 98.1309, GNorm = 0.4124, lr_0 = 8.5940e-04
Loss = 7.3223e-02, PNorm = 98.2189, GNorm = 0.5616, lr_0 = 8.5881e-04
Loss = 7.9653e-02, PNorm = 98.3033, GNorm = 0.5376, lr_0 = 8.5823e-04
Loss = 6.8153e-02, PNorm = 98.3890, GNorm = 0.5062, lr_0 = 8.5764e-04
Loss = 6.7932e-02, PNorm = 98.4771, GNorm = 0.6260, lr_0 = 8.5705e-04
Loss = 7.1201e-02, PNorm = 98.5553, GNorm = 0.6307, lr_0 = 8.5646e-04
Loss = 7.9197e-02, PNorm = 98.6350, GNorm = 0.4502, lr_0 = 8.5588e-04
Loss = 8.3676e-02, PNorm = 98.7201, GNorm = 0.3477, lr_0 = 8.5529e-04
Loss = 8.0102e-02, PNorm = 98.8158, GNorm = 0.2888, lr_0 = 8.5470e-04
Loss = 7.1356e-02, PNorm = 98.9139, GNorm = 0.3591, lr_0 = 8.5412e-04
Loss = 6.6392e-02, PNorm = 99.0039, GNorm = 0.5799, lr_0 = 8.5353e-04
Loss = 7.7934e-02, PNorm = 99.0857, GNorm = 0.5508, lr_0 = 8.5295e-04
Loss = 8.2790e-02, PNorm = 99.1851, GNorm = 0.6860, lr_0 = 8.5236e-04
Loss = 7.2061e-02, PNorm = 99.2759, GNorm = 0.4871, lr_0 = 8.5178e-04
Loss = 8.7307e-02, PNorm = 99.3681, GNorm = 1.1953, lr_0 = 8.5120e-04
Loss = 1.1112e-01, PNorm = 99.4662, GNorm = 0.8820, lr_0 = 8.5061e-04
Loss = 8.8711e-02, PNorm = 99.5834, GNorm = 0.8629, lr_0 = 8.5003e-04
Loss = 9.6943e-02, PNorm = 99.6918, GNorm = 0.5390, lr_0 = 8.4945e-04
Loss = 8.6623e-02, PNorm = 99.8009, GNorm = 0.5429, lr_0 = 8.4887e-04
Loss = 8.3363e-02, PNorm = 99.9007, GNorm = 0.3555, lr_0 = 8.4828e-04
Validation mae = 0.502216
Epoch 4
Loss = 4.8929e-02, PNorm = 99.9931, GNorm = 0.4436, lr_0 = 8.4770e-04
Loss = 5.3696e-02, PNorm = 100.0579, GNorm = 0.3059, lr_0 = 8.4712e-04
Loss = 5.6154e-02, PNorm = 100.1208, GNorm = 0.5851, lr_0 = 8.4654e-04
Loss = 4.7699e-02, PNorm = 100.1795, GNorm = 0.2619, lr_0 = 8.4596e-04
Loss = 5.8380e-02, PNorm = 100.2428, GNorm = 0.3983, lr_0 = 8.4538e-04
Loss = 4.8411e-02, PNorm = 100.3025, GNorm = 0.3998, lr_0 = 8.4480e-04
Loss = 4.7644e-02, PNorm = 100.3536, GNorm = 0.6112, lr_0 = 8.4423e-04
Loss = 5.5632e-02, PNorm = 100.4220, GNorm = 0.6272, lr_0 = 8.4365e-04
Loss = 5.1558e-02, PNorm = 100.4855, GNorm = 0.9317, lr_0 = 8.4307e-04
Loss = 4.5724e-02, PNorm = 100.5441, GNorm = 0.5190, lr_0 = 8.4249e-04
Loss = 4.8549e-02, PNorm = 100.6015, GNorm = 0.3532, lr_0 = 8.4191e-04
Loss = 4.6808e-02, PNorm = 100.6462, GNorm = 0.3404, lr_0 = 8.4134e-04
Loss = 4.3797e-02, PNorm = 100.6987, GNorm = 0.2963, lr_0 = 8.4076e-04
Loss = 3.8906e-02, PNorm = 100.7504, GNorm = 0.2966, lr_0 = 8.4019e-04
Loss = 5.1056e-02, PNorm = 100.8024, GNorm = 0.3746, lr_0 = 8.3961e-04
Loss = 5.1926e-02, PNorm = 100.8521, GNorm = 0.3623, lr_0 = 8.3903e-04
Loss = 4.5438e-02, PNorm = 100.9184, GNorm = 1.1951, lr_0 = 8.3846e-04
Loss = 4.7118e-02, PNorm = 100.9743, GNorm = 0.4791, lr_0 = 8.3789e-04
Loss = 5.2929e-02, PNorm = 101.0391, GNorm = 0.4500, lr_0 = 8.3731e-04
Loss = 5.0365e-02, PNorm = 101.0986, GNorm = 0.5470, lr_0 = 8.3674e-04
Loss = 4.4060e-02, PNorm = 101.1621, GNorm = 1.1209, lr_0 = 8.3616e-04
Loss = 5.0334e-02, PNorm = 101.2112, GNorm = 0.3690, lr_0 = 8.3559e-04
Loss = 5.4599e-02, PNorm = 101.2668, GNorm = 0.5689, lr_0 = 8.3502e-04
Loss = 4.3697e-02, PNorm = 101.3252, GNorm = 0.6370, lr_0 = 8.3445e-04
Loss = 4.4394e-02, PNorm = 101.3863, GNorm = 0.6677, lr_0 = 8.3388e-04
Loss = 4.9222e-02, PNorm = 101.4436, GNorm = 0.2994, lr_0 = 8.3330e-04
Loss = 4.9039e-02, PNorm = 101.4960, GNorm = 0.3752, lr_0 = 8.3273e-04
Loss = 4.7738e-02, PNorm = 101.5610, GNorm = 0.5725, lr_0 = 8.3216e-04
Loss = 4.4500e-02, PNorm = 101.6201, GNorm = 0.2932, lr_0 = 8.3159e-04
Loss = 4.9401e-02, PNorm = 101.6855, GNorm = 0.4831, lr_0 = 8.3102e-04
Loss = 4.9874e-02, PNorm = 101.7438, GNorm = 0.6408, lr_0 = 8.3045e-04
Loss = 5.3973e-02, PNorm = 101.8075, GNorm = 0.9939, lr_0 = 8.2988e-04
Loss = 4.8237e-02, PNorm = 101.8712, GNorm = 0.4663, lr_0 = 8.2932e-04
Loss = 4.0880e-02, PNorm = 101.9337, GNorm = 0.7384, lr_0 = 8.2875e-04
Loss = 5.0783e-02, PNorm = 102.0028, GNorm = 0.5836, lr_0 = 8.2818e-04
Loss = 4.3451e-02, PNorm = 102.0647, GNorm = 0.3628, lr_0 = 8.2761e-04
Loss = 4.5723e-02, PNorm = 102.1211, GNorm = 0.4541, lr_0 = 8.2705e-04
Loss = 5.4520e-02, PNorm = 102.1764, GNorm = 0.6573, lr_0 = 8.2648e-04
Loss = 4.8853e-02, PNorm = 102.2393, GNorm = 0.7385, lr_0 = 8.2591e-04
Loss = 5.1798e-02, PNorm = 102.3003, GNorm = 0.4456, lr_0 = 8.2535e-04
Loss = 4.7519e-02, PNorm = 102.3679, GNorm = 0.6918, lr_0 = 8.2478e-04
Loss = 4.3121e-02, PNorm = 102.4437, GNorm = 0.2958, lr_0 = 8.2422e-04
Loss = 5.5604e-02, PNorm = 102.5183, GNorm = 0.3156, lr_0 = 8.2365e-04
Loss = 4.2953e-02, PNorm = 102.5903, GNorm = 0.2408, lr_0 = 8.2309e-04
Loss = 4.2487e-02, PNorm = 102.6566, GNorm = 0.4568, lr_0 = 8.2252e-04
Loss = 4.5609e-02, PNorm = 102.7132, GNorm = 0.3008, lr_0 = 8.2196e-04
Loss = 4.9629e-02, PNorm = 102.7776, GNorm = 0.7901, lr_0 = 8.2140e-04
Loss = 5.2436e-02, PNorm = 102.8406, GNorm = 0.2755, lr_0 = 8.2084e-04
Loss = 5.0855e-02, PNorm = 102.9116, GNorm = 0.2950, lr_0 = 8.2027e-04
Loss = 4.5973e-02, PNorm = 102.9842, GNorm = 0.3275, lr_0 = 8.1971e-04
Loss = 4.2941e-02, PNorm = 103.0495, GNorm = 0.3913, lr_0 = 8.1915e-04
Loss = 4.4883e-02, PNorm = 103.1052, GNorm = 0.5717, lr_0 = 8.1859e-04
Loss = 4.4489e-02, PNorm = 103.1744, GNorm = 0.6537, lr_0 = 8.1803e-04
Loss = 5.0870e-02, PNorm = 103.2409, GNorm = 0.4133, lr_0 = 8.1747e-04
Loss = 4.3461e-02, PNorm = 103.3061, GNorm = 0.4904, lr_0 = 8.1691e-04
Loss = 4.3904e-02, PNorm = 103.3674, GNorm = 0.2693, lr_0 = 8.1635e-04
Loss = 5.0888e-02, PNorm = 103.4302, GNorm = 0.2868, lr_0 = 8.1579e-04
Loss = 4.2271e-02, PNorm = 103.5029, GNorm = 0.3323, lr_0 = 8.1523e-04
Loss = 4.7825e-02, PNorm = 103.5666, GNorm = 0.2896, lr_0 = 8.1467e-04
Loss = 4.9671e-02, PNorm = 103.6325, GNorm = 0.3901, lr_0 = 8.1411e-04
Loss = 4.4701e-02, PNorm = 103.6926, GNorm = 0.3743, lr_0 = 8.1355e-04
Loss = 5.0293e-02, PNorm = 103.7580, GNorm = 0.5227, lr_0 = 8.1300e-04
Loss = 5.2644e-02, PNorm = 103.8233, GNorm = 0.3504, lr_0 = 8.1244e-04
Loss = 5.0214e-02, PNorm = 103.8954, GNorm = 0.3424, lr_0 = 8.1188e-04
Loss = 4.2020e-02, PNorm = 103.9646, GNorm = 0.3035, lr_0 = 8.1133e-04
Loss = 4.1989e-02, PNorm = 104.0295, GNorm = 0.5504, lr_0 = 8.1077e-04
Loss = 5.6960e-02, PNorm = 104.0972, GNorm = 0.3855, lr_0 = 8.1022e-04
Loss = 5.9198e-02, PNorm = 104.1751, GNorm = 0.6484, lr_0 = 8.0966e-04
Loss = 5.1101e-02, PNorm = 104.2621, GNorm = 0.7085, lr_0 = 8.0911e-04
Loss = 5.5497e-02, PNorm = 104.3436, GNorm = 0.5420, lr_0 = 8.0855e-04
Loss = 5.0894e-02, PNorm = 104.4275, GNorm = 0.3242, lr_0 = 8.0800e-04
Loss = 4.4762e-02, PNorm = 104.4966, GNorm = 0.3387, lr_0 = 8.0745e-04
Loss = 6.2073e-02, PNorm = 104.5695, GNorm = 0.8153, lr_0 = 8.0689e-04
Loss = 4.8357e-02, PNorm = 104.6368, GNorm = 0.2925, lr_0 = 8.0634e-04
Loss = 5.6004e-02, PNorm = 104.7072, GNorm = 0.6470, lr_0 = 8.0579e-04
Loss = 4.9294e-02, PNorm = 104.7837, GNorm = 0.6167, lr_0 = 8.0523e-04
Loss = 4.8638e-02, PNorm = 104.8637, GNorm = 0.4295, lr_0 = 8.0468e-04
Loss = 5.6007e-02, PNorm = 104.9361, GNorm = 0.6507, lr_0 = 8.0413e-04
Loss = 4.9214e-02, PNorm = 105.0172, GNorm = 0.3864, lr_0 = 8.0358e-04
Loss = 4.2918e-02, PNorm = 105.0884, GNorm = 0.7066, lr_0 = 8.0303e-04
Loss = 5.2256e-02, PNorm = 105.1611, GNorm = 0.5953, lr_0 = 8.0248e-04
Loss = 5.1886e-02, PNorm = 105.2406, GNorm = 0.4220, lr_0 = 8.0193e-04
Loss = 4.7979e-02, PNorm = 105.3154, GNorm = 1.0454, lr_0 = 8.0138e-04
Loss = 5.0921e-02, PNorm = 105.3858, GNorm = 0.5416, lr_0 = 8.0083e-04
Loss = 4.9681e-02, PNorm = 105.4531, GNorm = 0.3376, lr_0 = 8.0028e-04
Loss = 4.9512e-02, PNorm = 105.5206, GNorm = 0.6575, lr_0 = 7.9974e-04
Loss = 5.4251e-02, PNorm = 105.5933, GNorm = 0.3202, lr_0 = 7.9919e-04
Loss = 5.2245e-02, PNorm = 105.6699, GNorm = 0.6404, lr_0 = 7.9864e-04
Loss = 5.6444e-02, PNorm = 105.7414, GNorm = 0.4680, lr_0 = 7.9809e-04
Loss = 5.0771e-02, PNorm = 105.8170, GNorm = 0.4708, lr_0 = 7.9755e-04
Loss = 5.2099e-02, PNorm = 105.8940, GNorm = 0.9114, lr_0 = 7.9700e-04
Loss = 4.5543e-02, PNorm = 105.9722, GNorm = 0.3452, lr_0 = 7.9645e-04
Loss = 4.6219e-02, PNorm = 106.0424, GNorm = 0.6870, lr_0 = 7.9591e-04
Loss = 5.3388e-02, PNorm = 106.1123, GNorm = 0.8849, lr_0 = 7.9536e-04
Loss = 4.7607e-02, PNorm = 106.1822, GNorm = 0.2483, lr_0 = 7.9482e-04
Loss = 4.8257e-02, PNorm = 106.2515, GNorm = 0.5166, lr_0 = 7.9427e-04
Loss = 5.2067e-02, PNorm = 106.3147, GNorm = 0.2275, lr_0 = 7.9373e-04
Loss = 5.3165e-02, PNorm = 106.3831, GNorm = 0.4772, lr_0 = 7.9319e-04
Loss = 5.2352e-02, PNorm = 106.4588, GNorm = 0.4252, lr_0 = 7.9264e-04
Loss = 5.3810e-02, PNorm = 106.5303, GNorm = 0.5475, lr_0 = 7.9210e-04
Loss = 5.4239e-02, PNorm = 106.6149, GNorm = 0.4460, lr_0 = 7.9156e-04
Loss = 5.2339e-02, PNorm = 106.6955, GNorm = 0.5190, lr_0 = 7.9101e-04
Loss = 5.2839e-02, PNorm = 106.7795, GNorm = 0.3376, lr_0 = 7.9047e-04
Loss = 5.9822e-02, PNorm = 106.8660, GNorm = 0.4283, lr_0 = 7.8993e-04
Loss = 6.4526e-02, PNorm = 106.9449, GNorm = 0.4074, lr_0 = 7.8939e-04
Loss = 5.2323e-02, PNorm = 107.0242, GNorm = 0.8100, lr_0 = 7.8885e-04
Loss = 5.7099e-02, PNorm = 107.1059, GNorm = 0.3366, lr_0 = 7.8831e-04
Loss = 5.7231e-02, PNorm = 107.1839, GNorm = 0.5095, lr_0 = 7.8777e-04
Loss = 4.6732e-02, PNorm = 107.2632, GNorm = 0.3674, lr_0 = 7.8723e-04
Loss = 5.6036e-02, PNorm = 107.3318, GNorm = 0.5347, lr_0 = 7.8669e-04
Loss = 5.2629e-02, PNorm = 107.4079, GNorm = 0.6224, lr_0 = 7.8615e-04
Loss = 5.7421e-02, PNorm = 107.4937, GNorm = 1.4606, lr_0 = 7.8561e-04
Loss = 4.8393e-02, PNorm = 107.5824, GNorm = 0.2781, lr_0 = 7.8507e-04
Loss = 5.4033e-02, PNorm = 107.6674, GNorm = 0.6203, lr_0 = 7.8454e-04
Loss = 5.5124e-02, PNorm = 107.7454, GNorm = 0.5281, lr_0 = 7.8400e-04
Loss = 5.3019e-02, PNorm = 107.8217, GNorm = 0.7583, lr_0 = 7.8346e-04
Loss = 6.1541e-02, PNorm = 107.9081, GNorm = 1.1218, lr_0 = 7.8293e-04
Loss = 5.6717e-02, PNorm = 107.9933, GNorm = 0.6068, lr_0 = 7.8239e-04
Loss = 4.7859e-02, PNorm = 108.0764, GNorm = 0.3433, lr_0 = 7.8185e-04
Loss = 5.6962e-02, PNorm = 108.1486, GNorm = 0.5838, lr_0 = 7.8132e-04
Validation mae = 0.496673
Epoch 5
Loss = 4.6631e-02, PNorm = 108.2163, GNorm = 0.3332, lr_0 = 7.8078e-04
Loss = 4.0245e-02, PNorm = 108.2758, GNorm = 0.4137, lr_0 = 7.8025e-04
Loss = 3.7994e-02, PNorm = 108.3302, GNorm = 0.5610, lr_0 = 7.7971e-04
Loss = 3.6182e-02, PNorm = 108.3762, GNorm = 0.2862, lr_0 = 7.7918e-04
Loss = 3.7288e-02, PNorm = 108.4271, GNorm = 0.5061, lr_0 = 7.7864e-04
Loss = 3.6760e-02, PNorm = 108.4834, GNorm = 0.5494, lr_0 = 7.7811e-04
Loss = 3.3180e-02, PNorm = 108.5356, GNorm = 0.2290, lr_0 = 7.7758e-04
Loss = 3.2514e-02, PNorm = 108.5808, GNorm = 0.4710, lr_0 = 7.7705e-04
Loss = 2.9497e-02, PNorm = 108.6188, GNorm = 0.7402, lr_0 = 7.7651e-04
Loss = 3.3793e-02, PNorm = 108.6566, GNorm = 0.4580, lr_0 = 7.7598e-04
Loss = 4.2409e-02, PNorm = 108.7056, GNorm = 0.5289, lr_0 = 7.7545e-04
Loss = 3.9873e-02, PNorm = 108.7567, GNorm = 0.3645, lr_0 = 7.7492e-04
Loss = 3.7743e-02, PNorm = 108.8143, GNorm = 0.3374, lr_0 = 7.7439e-04
Loss = 2.8318e-02, PNorm = 108.8730, GNorm = 0.6058, lr_0 = 7.7386e-04
Loss = 3.6006e-02, PNorm = 108.9180, GNorm = 0.6473, lr_0 = 7.7333e-04
Loss = 3.4860e-02, PNorm = 108.9668, GNorm = 0.3867, lr_0 = 7.7280e-04
Loss = 3.3288e-02, PNorm = 109.0130, GNorm = 0.2839, lr_0 = 7.7227e-04
Loss = 4.0240e-02, PNorm = 109.0621, GNorm = 0.7435, lr_0 = 7.7174e-04
Loss = 2.9960e-02, PNorm = 109.1072, GNorm = 0.3895, lr_0 = 7.7121e-04
Loss = 3.5782e-02, PNorm = 109.1621, GNorm = 0.3307, lr_0 = 7.7068e-04
Loss = 2.8209e-02, PNorm = 109.2091, GNorm = 0.3387, lr_0 = 7.7015e-04
Loss = 3.5680e-02, PNorm = 109.2536, GNorm = 0.3018, lr_0 = 7.6963e-04
Loss = 3.5867e-02, PNorm = 109.3056, GNorm = 0.2523, lr_0 = 7.6910e-04
Loss = 3.9470e-02, PNorm = 109.3621, GNorm = 0.4379, lr_0 = 7.6857e-04
Loss = 3.0671e-02, PNorm = 109.4201, GNorm = 0.4742, lr_0 = 7.6805e-04
Loss = 3.1971e-02, PNorm = 109.4658, GNorm = 0.5842, lr_0 = 7.6752e-04
Loss = 3.5460e-02, PNorm = 109.5119, GNorm = 0.6509, lr_0 = 7.6699e-04
Loss = 3.3971e-02, PNorm = 109.5694, GNorm = 0.5899, lr_0 = 7.6647e-04
Loss = 2.8678e-02, PNorm = 109.6220, GNorm = 0.3134, lr_0 = 7.6594e-04
Loss = 3.0851e-02, PNorm = 109.6706, GNorm = 0.4382, lr_0 = 7.6542e-04
Loss = 3.3815e-02, PNorm = 109.7164, GNorm = 0.3540, lr_0 = 7.6489e-04
Loss = 3.2181e-02, PNorm = 109.7677, GNorm = 0.4725, lr_0 = 7.6437e-04
Loss = 3.8364e-02, PNorm = 109.8178, GNorm = 0.4302, lr_0 = 7.6385e-04
Loss = 3.9498e-02, PNorm = 109.8712, GNorm = 0.4106, lr_0 = 7.6332e-04
Loss = 3.7789e-02, PNorm = 109.9317, GNorm = 0.3057, lr_0 = 7.6280e-04
Loss = 3.0157e-02, PNorm = 109.9890, GNorm = 0.4282, lr_0 = 7.6228e-04
Loss = 3.0718e-02, PNorm = 110.0465, GNorm = 0.2143, lr_0 = 7.6176e-04
Loss = 3.9245e-02, PNorm = 110.0932, GNorm = 0.4896, lr_0 = 7.6123e-04
Loss = 4.2349e-02, PNorm = 110.1517, GNorm = 0.4054, lr_0 = 7.6071e-04
Loss = 3.2977e-02, PNorm = 110.2062, GNorm = 0.2406, lr_0 = 7.6019e-04
Loss = 3.4819e-02, PNorm = 110.2596, GNorm = 0.2832, lr_0 = 7.5967e-04
Loss = 3.7003e-02, PNorm = 110.3230, GNorm = 0.6928, lr_0 = 7.5915e-04
Loss = 2.8011e-02, PNorm = 110.3754, GNorm = 0.3065, lr_0 = 7.5863e-04
Loss = 3.1500e-02, PNorm = 110.4299, GNorm = 0.7153, lr_0 = 7.5811e-04
Loss = 3.8988e-02, PNorm = 110.4779, GNorm = 0.4605, lr_0 = 7.5759e-04
Loss = 3.7608e-02, PNorm = 110.5228, GNorm = 0.5972, lr_0 = 7.5707e-04
Loss = 3.3683e-02, PNorm = 110.5800, GNorm = 0.2272, lr_0 = 7.5655e-04
Loss = 3.3253e-02, PNorm = 110.6289, GNorm = 0.4553, lr_0 = 7.5603e-04
Loss = 3.7212e-02, PNorm = 110.6950, GNorm = 0.6500, lr_0 = 7.5552e-04
Loss = 3.7956e-02, PNorm = 110.7551, GNorm = 0.5620, lr_0 = 7.5500e-04
Loss = 3.3364e-02, PNorm = 110.8073, GNorm = 0.6218, lr_0 = 7.5448e-04
Loss = 3.5201e-02, PNorm = 110.8517, GNorm = 0.2392, lr_0 = 7.5397e-04
Loss = 3.6009e-02, PNorm = 110.9081, GNorm = 0.4881, lr_0 = 7.5345e-04
Loss = 3.9422e-02, PNorm = 110.9645, GNorm = 0.3489, lr_0 = 7.5293e-04
Loss = 3.7914e-02, PNorm = 111.0277, GNorm = 0.3702, lr_0 = 7.5242e-04
Loss = 3.0400e-02, PNorm = 111.0898, GNorm = 0.4010, lr_0 = 7.5190e-04
Loss = 3.6564e-02, PNorm = 111.1560, GNorm = 0.3995, lr_0 = 7.5139e-04
Loss = 3.4984e-02, PNorm = 111.2204, GNorm = 0.3756, lr_0 = 7.5087e-04
Loss = 4.2186e-02, PNorm = 111.2808, GNorm = 0.8536, lr_0 = 7.5036e-04
Loss = 3.1198e-02, PNorm = 111.3453, GNorm = 0.2742, lr_0 = 7.4984e-04
Loss = 3.6017e-02, PNorm = 111.4080, GNorm = 0.4265, lr_0 = 7.4933e-04
Loss = 4.2169e-02, PNorm = 111.4595, GNorm = 1.1017, lr_0 = 7.4882e-04
Loss = 3.1880e-02, PNorm = 111.5223, GNorm = 0.2441, lr_0 = 7.4830e-04
Loss = 3.3886e-02, PNorm = 111.5878, GNorm = 0.5078, lr_0 = 7.4779e-04
Loss = 3.3532e-02, PNorm = 111.6560, GNorm = 0.5251, lr_0 = 7.4728e-04
Loss = 4.3307e-02, PNorm = 111.7173, GNorm = 0.5305, lr_0 = 7.4677e-04
Loss = 3.6407e-02, PNorm = 111.7839, GNorm = 0.8577, lr_0 = 7.4625e-04
Loss = 4.7111e-02, PNorm = 111.8460, GNorm = 0.8184, lr_0 = 7.4574e-04
Loss = 4.3245e-02, PNorm = 111.9159, GNorm = 0.4472, lr_0 = 7.4523e-04
Loss = 3.4748e-02, PNorm = 111.9788, GNorm = 0.6124, lr_0 = 7.4472e-04
Loss = 3.2730e-02, PNorm = 112.0434, GNorm = 0.3039, lr_0 = 7.4421e-04
Loss = 3.5294e-02, PNorm = 112.1080, GNorm = 0.4978, lr_0 = 7.4370e-04
Loss = 3.5430e-02, PNorm = 112.1774, GNorm = 0.3731, lr_0 = 7.4319e-04
Loss = 3.5935e-02, PNorm = 112.2424, GNorm = 0.5802, lr_0 = 7.4268e-04
Loss = 4.0598e-02, PNorm = 112.3063, GNorm = 0.3726, lr_0 = 7.4217e-04
Loss = 3.4951e-02, PNorm = 112.3723, GNorm = 0.4015, lr_0 = 7.4167e-04
Loss = 3.7023e-02, PNorm = 112.4386, GNorm = 0.3908, lr_0 = 7.4116e-04
Loss = 3.4285e-02, PNorm = 112.5066, GNorm = 0.2615, lr_0 = 7.4065e-04
Loss = 3.2946e-02, PNorm = 112.5679, GNorm = 0.5577, lr_0 = 7.4014e-04
Loss = 4.1851e-02, PNorm = 112.6332, GNorm = 0.4317, lr_0 = 7.3964e-04
Loss = 4.4609e-02, PNorm = 112.7018, GNorm = 0.6168, lr_0 = 7.3913e-04
Loss = 3.8332e-02, PNorm = 112.7810, GNorm = 0.7151, lr_0 = 7.3862e-04
Loss = 4.4961e-02, PNorm = 112.8516, GNorm = 0.2529, lr_0 = 7.3812e-04
Loss = 3.9387e-02, PNorm = 112.9182, GNorm = 0.6955, lr_0 = 7.3761e-04
Loss = 4.1996e-02, PNorm = 112.9898, GNorm = 0.5530, lr_0 = 7.3711e-04
Loss = 3.8898e-02, PNorm = 113.0637, GNorm = 0.5617, lr_0 = 7.3660e-04
Loss = 3.9779e-02, PNorm = 113.1319, GNorm = 1.1476, lr_0 = 7.3610e-04
Loss = 3.7233e-02, PNorm = 113.1992, GNorm = 0.2782, lr_0 = 7.3559e-04
Loss = 3.5660e-02, PNorm = 113.2671, GNorm = 0.5078, lr_0 = 7.3509e-04
Loss = 4.1342e-02, PNorm = 113.3357, GNorm = 0.5376, lr_0 = 7.3458e-04
Loss = 3.9547e-02, PNorm = 113.4114, GNorm = 0.4960, lr_0 = 7.3408e-04
Loss = 4.2089e-02, PNorm = 113.4765, GNorm = 0.4663, lr_0 = 7.3358e-04
Loss = 3.5751e-02, PNorm = 113.5452, GNorm = 0.3302, lr_0 = 7.3308e-04
Loss = 4.9525e-02, PNorm = 113.6095, GNorm = 0.6972, lr_0 = 7.3257e-04
Loss = 3.7393e-02, PNorm = 113.6835, GNorm = 0.3441, lr_0 = 7.3207e-04
Loss = 3.7589e-02, PNorm = 113.7474, GNorm = 0.2614, lr_0 = 7.3157e-04
Loss = 3.6737e-02, PNorm = 113.8171, GNorm = 0.4384, lr_0 = 7.3107e-04
Loss = 3.5920e-02, PNorm = 113.8803, GNorm = 0.3523, lr_0 = 7.3057e-04
Loss = 3.6863e-02, PNorm = 113.9480, GNorm = 1.2111, lr_0 = 7.3007e-04
Loss = 3.0362e-02, PNorm = 114.0075, GNorm = 0.4556, lr_0 = 7.2957e-04
Loss = 4.4216e-02, PNorm = 114.0658, GNorm = 0.5980, lr_0 = 7.2907e-04
Loss = 3.8296e-02, PNorm = 114.1268, GNorm = 0.5490, lr_0 = 7.2857e-04
Loss = 3.9610e-02, PNorm = 114.1906, GNorm = 0.2152, lr_0 = 7.2807e-04
Loss = 4.3674e-02, PNorm = 114.2541, GNorm = 0.3833, lr_0 = 7.2757e-04
Loss = 3.5787e-02, PNorm = 114.3263, GNorm = 0.4634, lr_0 = 7.2707e-04
Loss = 4.3178e-02, PNorm = 114.3941, GNorm = 0.8524, lr_0 = 7.2657e-04
Loss = 4.1831e-02, PNorm = 114.4685, GNorm = 0.8603, lr_0 = 7.2608e-04
Loss = 3.9823e-02, PNorm = 114.5308, GNorm = 0.6470, lr_0 = 7.2558e-04
Loss = 3.8348e-02, PNorm = 114.5994, GNorm = 0.3062, lr_0 = 7.2508e-04
Loss = 3.8671e-02, PNorm = 114.6673, GNorm = 0.2881, lr_0 = 7.2458e-04
Loss = 4.2227e-02, PNorm = 114.7372, GNorm = 0.8517, lr_0 = 7.2409e-04
Loss = 3.9485e-02, PNorm = 114.8086, GNorm = 0.2792, lr_0 = 7.2359e-04
Loss = 3.8275e-02, PNorm = 114.8859, GNorm = 0.4707, lr_0 = 7.2310e-04
Loss = 4.2148e-02, PNorm = 114.9572, GNorm = 0.3829, lr_0 = 7.2260e-04
Loss = 4.2415e-02, PNorm = 115.0279, GNorm = 0.5416, lr_0 = 7.2211e-04
Loss = 3.9437e-02, PNorm = 115.1027, GNorm = 0.7108, lr_0 = 7.2161e-04
Loss = 4.2566e-02, PNorm = 115.1766, GNorm = 0.3851, lr_0 = 7.2112e-04
Loss = 4.5184e-02, PNorm = 115.2478, GNorm = 1.1597, lr_0 = 7.2062e-04
Loss = 4.2320e-02, PNorm = 115.3192, GNorm = 0.4130, lr_0 = 7.2013e-04
Loss = 3.7868e-02, PNorm = 115.3987, GNorm = 0.4027, lr_0 = 7.1964e-04
Validation mae = 0.493424
Epoch 6
Loss = 3.3372e-02, PNorm = 115.4657, GNorm = 0.5037, lr_0 = 7.1914e-04
Loss = 2.9213e-02, PNorm = 115.5230, GNorm = 0.2717, lr_0 = 7.1865e-04
Loss = 2.9323e-02, PNorm = 115.5626, GNorm = 0.5261, lr_0 = 7.1816e-04
Loss = 3.1002e-02, PNorm = 115.6122, GNorm = 0.5691, lr_0 = 7.1767e-04
Loss = 3.3694e-02, PNorm = 115.6534, GNorm = 0.3617, lr_0 = 7.1717e-04
Loss = 2.6097e-02, PNorm = 115.7002, GNorm = 0.3240, lr_0 = 7.1668e-04
Loss = 2.5028e-02, PNorm = 115.7389, GNorm = 0.2080, lr_0 = 7.1619e-04
Loss = 2.7825e-02, PNorm = 115.7817, GNorm = 0.3193, lr_0 = 7.1570e-04
Loss = 2.5974e-02, PNorm = 115.8232, GNorm = 0.4356, lr_0 = 7.1521e-04
Loss = 2.5086e-02, PNorm = 115.8658, GNorm = 0.4172, lr_0 = 7.1472e-04
Loss = 2.6070e-02, PNorm = 115.9085, GNorm = 0.5387, lr_0 = 7.1423e-04
Loss = 3.3716e-02, PNorm = 115.9572, GNorm = 0.5094, lr_0 = 7.1374e-04
Loss = 2.2914e-02, PNorm = 116.0079, GNorm = 0.3278, lr_0 = 7.1325e-04
Loss = 3.0357e-02, PNorm = 116.0451, GNorm = 0.5207, lr_0 = 7.1277e-04
Loss = 2.3897e-02, PNorm = 116.0841, GNorm = 0.2805, lr_0 = 7.1228e-04
Loss = 2.6534e-02, PNorm = 116.1260, GNorm = 0.1552, lr_0 = 7.1179e-04
Loss = 2.4630e-02, PNorm = 116.1656, GNorm = 0.4050, lr_0 = 7.1130e-04
Loss = 2.7471e-02, PNorm = 116.2058, GNorm = 0.4298, lr_0 = 7.1081e-04
Loss = 2.3840e-02, PNorm = 116.2472, GNorm = 0.2265, lr_0 = 7.1033e-04
Loss = 2.9758e-02, PNorm = 116.2905, GNorm = 0.2911, lr_0 = 7.0984e-04
Loss = 2.6168e-02, PNorm = 116.3345, GNorm = 0.3205, lr_0 = 7.0935e-04
Loss = 2.7125e-02, PNorm = 116.3694, GNorm = 0.6481, lr_0 = 7.0887e-04
Loss = 2.7074e-02, PNorm = 116.4225, GNorm = 0.6994, lr_0 = 7.0838e-04
Loss = 2.9080e-02, PNorm = 116.4672, GNorm = 0.6906, lr_0 = 7.0790e-04
Loss = 2.7509e-02, PNorm = 116.5251, GNorm = 0.2864, lr_0 = 7.0741e-04
Loss = 3.2592e-02, PNorm = 116.5727, GNorm = 0.3890, lr_0 = 7.0693e-04
Loss = 2.4041e-02, PNorm = 116.6330, GNorm = 0.3829, lr_0 = 7.0644e-04
Loss = 3.0460e-02, PNorm = 116.6812, GNorm = 0.7113, lr_0 = 7.0596e-04
Loss = 2.9161e-02, PNorm = 116.7264, GNorm = 0.3558, lr_0 = 7.0548e-04
Loss = 2.6602e-02, PNorm = 116.7752, GNorm = 0.6772, lr_0 = 7.0499e-04
Loss = 2.9502e-02, PNorm = 116.8157, GNorm = 0.5981, lr_0 = 7.0451e-04
Loss = 2.9945e-02, PNorm = 116.8665, GNorm = 0.5658, lr_0 = 7.0403e-04
Loss = 2.7960e-02, PNorm = 116.9219, GNorm = 0.3169, lr_0 = 7.0354e-04
Loss = 2.5721e-02, PNorm = 116.9630, GNorm = 0.5590, lr_0 = 7.0306e-04
Loss = 2.9055e-02, PNorm = 117.0143, GNorm = 0.6797, lr_0 = 7.0258e-04
Loss = 2.1985e-02, PNorm = 117.0592, GNorm = 0.7366, lr_0 = 7.0210e-04
Loss = 2.7294e-02, PNorm = 117.0993, GNorm = 0.3343, lr_0 = 7.0162e-04
Loss = 2.9138e-02, PNorm = 117.1459, GNorm = 0.3733, lr_0 = 7.0114e-04
Loss = 2.9016e-02, PNorm = 117.1892, GNorm = 0.3387, lr_0 = 7.0066e-04
Loss = 2.6451e-02, PNorm = 117.2338, GNorm = 0.2586, lr_0 = 7.0018e-04
Loss = 2.8657e-02, PNorm = 117.2754, GNorm = 0.2623, lr_0 = 6.9970e-04
Loss = 2.7488e-02, PNorm = 117.3229, GNorm = 0.3078, lr_0 = 6.9922e-04
Loss = 2.9640e-02, PNorm = 117.3644, GNorm = 0.6800, lr_0 = 6.9874e-04
Loss = 3.6364e-02, PNorm = 117.4147, GNorm = 0.6861, lr_0 = 6.9826e-04
Loss = 2.4735e-02, PNorm = 117.4661, GNorm = 0.4941, lr_0 = 6.9778e-04
Loss = 2.4313e-02, PNorm = 117.5197, GNorm = 0.4768, lr_0 = 6.9730e-04
Loss = 2.8255e-02, PNorm = 117.5689, GNorm = 0.8738, lr_0 = 6.9683e-04
Loss = 2.7514e-02, PNorm = 117.6165, GNorm = 0.4053, lr_0 = 6.9635e-04
Loss = 2.9710e-02, PNorm = 117.6564, GNorm = 0.4277, lr_0 = 6.9587e-04
Loss = 3.0096e-02, PNorm = 117.7071, GNorm = 0.4255, lr_0 = 6.9540e-04
Loss = 3.1129e-02, PNorm = 117.7646, GNorm = 0.5022, lr_0 = 6.9492e-04
Loss = 2.8606e-02, PNorm = 117.8241, GNorm = 0.4009, lr_0 = 6.9444e-04
Loss = 2.9568e-02, PNorm = 117.8884, GNorm = 0.9652, lr_0 = 6.9397e-04
Loss = 2.8421e-02, PNorm = 117.9420, GNorm = 0.4653, lr_0 = 6.9349e-04
Loss = 2.5052e-02, PNorm = 117.9939, GNorm = 0.4595, lr_0 = 6.9302e-04
Loss = 2.6387e-02, PNorm = 118.0363, GNorm = 0.2220, lr_0 = 6.9254e-04
Loss = 2.5716e-02, PNorm = 118.0802, GNorm = 0.4097, lr_0 = 6.9207e-04
Loss = 2.6786e-02, PNorm = 118.1315, GNorm = 0.5286, lr_0 = 6.9159e-04
Loss = 2.5967e-02, PNorm = 118.1836, GNorm = 0.3264, lr_0 = 6.9112e-04
Loss = 2.6089e-02, PNorm = 118.2332, GNorm = 0.2635, lr_0 = 6.9065e-04
Loss = 3.1221e-02, PNorm = 118.2791, GNorm = 0.5570, lr_0 = 6.9017e-04
Loss = 2.7058e-02, PNorm = 118.3248, GNorm = 0.2983, lr_0 = 6.8970e-04
Loss = 2.4191e-02, PNorm = 118.3706, GNorm = 0.4089, lr_0 = 6.8923e-04
Loss = 2.6531e-02, PNorm = 118.4217, GNorm = 0.3935, lr_0 = 6.8876e-04
Loss = 3.2756e-02, PNorm = 118.4754, GNorm = 0.9469, lr_0 = 6.8828e-04
Loss = 3.0856e-02, PNorm = 118.5278, GNorm = 0.2651, lr_0 = 6.8781e-04
Loss = 2.7658e-02, PNorm = 118.5809, GNorm = 0.5275, lr_0 = 6.8734e-04
Loss = 2.9922e-02, PNorm = 118.6349, GNorm = 0.3456, lr_0 = 6.8687e-04
Loss = 2.9396e-02, PNorm = 118.6938, GNorm = 0.2841, lr_0 = 6.8640e-04
Loss = 2.9614e-02, PNorm = 118.7523, GNorm = 0.3675, lr_0 = 6.8593e-04
Loss = 3.0263e-02, PNorm = 118.8052, GNorm = 0.5902, lr_0 = 6.8546e-04
Loss = 2.7707e-02, PNorm = 118.8662, GNorm = 0.6983, lr_0 = 6.8499e-04
Loss = 2.9415e-02, PNorm = 118.9171, GNorm = 0.3108, lr_0 = 6.8452e-04
Loss = 2.6666e-02, PNorm = 118.9681, GNorm = 0.2614, lr_0 = 6.8405e-04
Loss = 2.7909e-02, PNorm = 119.0237, GNorm = 0.4450, lr_0 = 6.8358e-04
Loss = 2.9929e-02, PNorm = 119.0800, GNorm = 0.1968, lr_0 = 6.8312e-04
Loss = 2.6231e-02, PNorm = 119.1305, GNorm = 0.3125, lr_0 = 6.8265e-04
Loss = 2.5488e-02, PNorm = 119.1784, GNorm = 0.2541, lr_0 = 6.8218e-04
Loss = 3.1585e-02, PNorm = 119.2269, GNorm = 0.8293, lr_0 = 6.8171e-04
Loss = 2.8831e-02, PNorm = 119.2746, GNorm = 0.4428, lr_0 = 6.8125e-04
Loss = 2.9919e-02, PNorm = 119.3341, GNorm = 0.3279, lr_0 = 6.8078e-04
Loss = 2.6786e-02, PNorm = 119.3896, GNorm = 0.6407, lr_0 = 6.8031e-04
Loss = 2.8024e-02, PNorm = 119.4468, GNorm = 0.5465, lr_0 = 6.7985e-04
Loss = 2.8872e-02, PNorm = 119.5052, GNorm = 0.3403, lr_0 = 6.7938e-04
Loss = 3.0512e-02, PNorm = 119.5607, GNorm = 0.4078, lr_0 = 6.7892e-04
Loss = 2.7100e-02, PNorm = 119.6194, GNorm = 0.4619, lr_0 = 6.7845e-04
Loss = 2.7208e-02, PNorm = 119.6733, GNorm = 0.7094, lr_0 = 6.7799e-04
Loss = 2.9411e-02, PNorm = 119.7311, GNorm = 0.4295, lr_0 = 6.7752e-04
Loss = 3.4665e-02, PNorm = 119.7893, GNorm = 0.4368, lr_0 = 6.7706e-04
Loss = 3.4951e-02, PNorm = 119.8510, GNorm = 1.0363, lr_0 = 6.7659e-04
Loss = 3.1063e-02, PNorm = 119.9261, GNorm = 0.6588, lr_0 = 6.7613e-04
Loss = 3.1473e-02, PNorm = 119.9837, GNorm = 0.3497, lr_0 = 6.7567e-04
Loss = 2.9455e-02, PNorm = 120.0436, GNorm = 0.2615, lr_0 = 6.7520e-04
Loss = 2.7154e-02, PNorm = 120.0999, GNorm = 0.3253, lr_0 = 6.7474e-04
Loss = 3.1858e-02, PNorm = 120.1585, GNorm = 0.3666, lr_0 = 6.7428e-04
Loss = 3.2546e-02, PNorm = 120.2175, GNorm = 0.2532, lr_0 = 6.7382e-04
Loss = 2.7552e-02, PNorm = 120.2691, GNorm = 0.4428, lr_0 = 6.7335e-04
Loss = 3.4647e-02, PNorm = 120.3253, GNorm = 0.4625, lr_0 = 6.7289e-04
Loss = 2.9342e-02, PNorm = 120.3922, GNorm = 0.2327, lr_0 = 6.7243e-04
Loss = 2.8631e-02, PNorm = 120.4507, GNorm = 0.2923, lr_0 = 6.7197e-04
Loss = 3.0072e-02, PNorm = 120.5062, GNorm = 0.6963, lr_0 = 6.7151e-04
Loss = 3.2688e-02, PNorm = 120.5649, GNorm = 0.3013, lr_0 = 6.7105e-04
Loss = 2.7803e-02, PNorm = 120.6256, GNorm = 0.2436, lr_0 = 6.7059e-04
Loss = 3.4322e-02, PNorm = 120.6941, GNorm = 0.2933, lr_0 = 6.7013e-04
Loss = 3.2773e-02, PNorm = 120.7600, GNorm = 0.3199, lr_0 = 6.6967e-04
Loss = 3.2638e-02, PNorm = 120.8240, GNorm = 0.2906, lr_0 = 6.6921e-04
Loss = 2.9011e-02, PNorm = 120.8868, GNorm = 0.4136, lr_0 = 6.6876e-04
Loss = 2.8964e-02, PNorm = 120.9476, GNorm = 0.4788, lr_0 = 6.6830e-04
Loss = 3.3664e-02, PNorm = 121.0074, GNorm = 0.2068, lr_0 = 6.6784e-04
Loss = 3.0238e-02, PNorm = 121.0688, GNorm = 0.7563, lr_0 = 6.6738e-04
Loss = 2.8722e-02, PNorm = 121.1235, GNorm = 0.5698, lr_0 = 6.6693e-04
Loss = 3.3262e-02, PNorm = 121.1774, GNorm = 0.3696, lr_0 = 6.6647e-04
Loss = 3.4620e-02, PNorm = 121.2360, GNorm = 0.3621, lr_0 = 6.6601e-04
Loss = 2.8773e-02, PNorm = 121.3048, GNorm = 0.3562, lr_0 = 6.6556e-04
Loss = 2.9434e-02, PNorm = 121.3655, GNorm = 0.6435, lr_0 = 6.6510e-04
Loss = 3.3855e-02, PNorm = 121.4251, GNorm = 0.3168, lr_0 = 6.6464e-04
Loss = 3.3003e-02, PNorm = 121.4886, GNorm = 0.7898, lr_0 = 6.6419e-04
Loss = 2.8735e-02, PNorm = 121.5445, GNorm = 0.7617, lr_0 = 6.6373e-04
Loss = 3.4331e-02, PNorm = 121.6091, GNorm = 0.4142, lr_0 = 6.6328e-04
Loss = 3.1855e-02, PNorm = 121.6751, GNorm = 0.7301, lr_0 = 6.6282e-04
Validation mae = 0.490257
Epoch 7
Loss = 2.4841e-02, PNorm = 121.7295, GNorm = 0.2865, lr_0 = 6.6237e-04
Loss = 2.6853e-02, PNorm = 121.7722, GNorm = 0.5938, lr_0 = 6.6192e-04
Loss = 2.3851e-02, PNorm = 121.8200, GNorm = 0.2980, lr_0 = 6.6146e-04
Loss = 2.6588e-02, PNorm = 121.8665, GNorm = 0.2992, lr_0 = 6.6101e-04
Loss = 2.4145e-02, PNorm = 121.9032, GNorm = 0.5528, lr_0 = 6.6056e-04
Loss = 2.5417e-02, PNorm = 121.9436, GNorm = 0.3725, lr_0 = 6.6011e-04
Loss = 2.7535e-02, PNorm = 121.9907, GNorm = 0.3948, lr_0 = 6.5965e-04
Loss = 2.6867e-02, PNorm = 122.0405, GNorm = 0.4832, lr_0 = 6.5920e-04
Loss = 2.7175e-02, PNorm = 122.0799, GNorm = 0.6731, lr_0 = 6.5875e-04
Loss = 2.1043e-02, PNorm = 122.1288, GNorm = 0.3334, lr_0 = 6.5830e-04
Loss = 2.0112e-02, PNorm = 122.1683, GNorm = 0.6567, lr_0 = 6.5785e-04
Loss = 2.3454e-02, PNorm = 122.2062, GNorm = 0.4187, lr_0 = 6.5740e-04
Loss = 2.2305e-02, PNorm = 122.2467, GNorm = 0.2097, lr_0 = 6.5695e-04
Loss = 2.3240e-02, PNorm = 122.2851, GNorm = 0.2069, lr_0 = 6.5650e-04
Loss = 1.9989e-02, PNorm = 122.3226, GNorm = 0.2216, lr_0 = 6.5605e-04
Loss = 2.3507e-02, PNorm = 122.3530, GNorm = 0.2127, lr_0 = 6.5560e-04
Loss = 2.3248e-02, PNorm = 122.3932, GNorm = 0.3970, lr_0 = 6.5515e-04
Loss = 2.4991e-02, PNorm = 122.4396, GNorm = 0.1624, lr_0 = 6.5470e-04
Loss = 2.2139e-02, PNorm = 122.4862, GNorm = 0.1454, lr_0 = 6.5425e-04
Loss = 2.6200e-02, PNorm = 122.5291, GNorm = 0.2327, lr_0 = 6.5380e-04
Loss = 2.3306e-02, PNorm = 122.5702, GNorm = 0.5595, lr_0 = 6.5335e-04
Loss = 1.9601e-02, PNorm = 122.6077, GNorm = 0.2205, lr_0 = 6.5291e-04
Loss = 2.3301e-02, PNorm = 122.6507, GNorm = 0.3004, lr_0 = 6.5246e-04
Loss = 2.3981e-02, PNorm = 122.6934, GNorm = 0.6535, lr_0 = 6.5201e-04
Loss = 2.2614e-02, PNorm = 122.7393, GNorm = 0.6809, lr_0 = 6.5157e-04
Loss = 2.3750e-02, PNorm = 122.7762, GNorm = 0.4698, lr_0 = 6.5112e-04
Loss = 2.1293e-02, PNorm = 122.8185, GNorm = 0.1879, lr_0 = 6.5067e-04
Loss = 1.8308e-02, PNorm = 122.8621, GNorm = 0.1567, lr_0 = 6.5023e-04
Loss = 2.2916e-02, PNorm = 122.8968, GNorm = 0.5107, lr_0 = 6.4978e-04
Loss = 2.1685e-02, PNorm = 122.9387, GNorm = 0.3355, lr_0 = 6.4934e-04
Loss = 2.2608e-02, PNorm = 122.9828, GNorm = 0.4459, lr_0 = 6.4889e-04
Loss = 2.1054e-02, PNorm = 123.0273, GNorm = 0.2092, lr_0 = 6.4845e-04
Loss = 2.3800e-02, PNorm = 123.0701, GNorm = 0.3150, lr_0 = 6.4800e-04
Loss = 1.8978e-02, PNorm = 123.1133, GNorm = 0.1958, lr_0 = 6.4756e-04
Loss = 1.6623e-02, PNorm = 123.1536, GNorm = 0.1918, lr_0 = 6.4712e-04
Loss = 2.0949e-02, PNorm = 123.1943, GNorm = 0.2827, lr_0 = 6.4667e-04
Loss = 2.3995e-02, PNorm = 123.2299, GNorm = 0.8642, lr_0 = 6.4623e-04
Loss = 2.0098e-02, PNorm = 123.2686, GNorm = 0.2463, lr_0 = 6.4579e-04
Loss = 2.3404e-02, PNorm = 123.3110, GNorm = 0.2094, lr_0 = 6.4534e-04
Loss = 2.1488e-02, PNorm = 123.3608, GNorm = 0.5473, lr_0 = 6.4490e-04
Loss = 2.2137e-02, PNorm = 123.4072, GNorm = 0.3138, lr_0 = 6.4446e-04
Loss = 2.6034e-02, PNorm = 123.4467, GNorm = 0.4601, lr_0 = 6.4402e-04
Loss = 2.1619e-02, PNorm = 123.4965, GNorm = 0.2493, lr_0 = 6.4358e-04
Loss = 2.0448e-02, PNorm = 123.5431, GNorm = 0.2161, lr_0 = 6.4314e-04
Loss = 2.1504e-02, PNorm = 123.5855, GNorm = 0.3084, lr_0 = 6.4270e-04
Loss = 2.2490e-02, PNorm = 123.6261, GNorm = 0.5998, lr_0 = 6.4226e-04
Loss = 2.4219e-02, PNorm = 123.6778, GNorm = 0.6374, lr_0 = 6.4182e-04
Loss = 2.2246e-02, PNorm = 123.7185, GNorm = 0.3793, lr_0 = 6.4138e-04
Loss = 2.1200e-02, PNorm = 123.7610, GNorm = 0.1853, lr_0 = 6.4094e-04
Loss = 2.1722e-02, PNorm = 123.7933, GNorm = 0.5500, lr_0 = 6.4050e-04
Loss = 2.0960e-02, PNorm = 123.8349, GNorm = 0.3344, lr_0 = 6.4006e-04
Loss = 2.0357e-02, PNorm = 123.8729, GNorm = 0.4445, lr_0 = 6.3962e-04
Loss = 2.4297e-02, PNorm = 123.9135, GNorm = 0.2106, lr_0 = 6.3918e-04
Loss = 2.1315e-02, PNorm = 123.9633, GNorm = 0.5350, lr_0 = 6.3874e-04
Loss = 2.5824e-02, PNorm = 124.0114, GNorm = 0.4424, lr_0 = 6.3831e-04
Loss = 1.8501e-02, PNorm = 124.0545, GNorm = 0.2363, lr_0 = 6.3787e-04
Loss = 2.0841e-02, PNorm = 124.0989, GNorm = 0.1778, lr_0 = 6.3743e-04
Loss = 2.1746e-02, PNorm = 124.1407, GNorm = 0.2141, lr_0 = 6.3700e-04
Loss = 2.3050e-02, PNorm = 124.1874, GNorm = 0.9258, lr_0 = 6.3656e-04
Loss = 1.8549e-02, PNorm = 124.2313, GNorm = 0.3360, lr_0 = 6.3612e-04
Loss = 1.9650e-02, PNorm = 124.2834, GNorm = 0.3140, lr_0 = 6.3569e-04
Loss = 2.3660e-02, PNorm = 124.3206, GNorm = 0.3735, lr_0 = 6.3525e-04
Loss = 1.8873e-02, PNorm = 124.3636, GNorm = 0.4373, lr_0 = 6.3482e-04
Loss = 2.2535e-02, PNorm = 124.4098, GNorm = 0.6301, lr_0 = 6.3438e-04
Loss = 2.5809e-02, PNorm = 124.4558, GNorm = 0.8107, lr_0 = 6.3395e-04
Loss = 2.8162e-02, PNorm = 124.5046, GNorm = 0.8403, lr_0 = 6.3351e-04
Loss = 2.5520e-02, PNorm = 124.5584, GNorm = 0.6513, lr_0 = 6.3308e-04
Loss = 2.0749e-02, PNorm = 124.6165, GNorm = 0.4064, lr_0 = 6.3265e-04
Loss = 2.2200e-02, PNorm = 124.6686, GNorm = 0.4849, lr_0 = 6.3221e-04
Loss = 2.2666e-02, PNorm = 124.7149, GNorm = 0.5580, lr_0 = 6.3178e-04
Loss = 2.1753e-02, PNorm = 124.7659, GNorm = 0.5853, lr_0 = 6.3135e-04
Loss = 2.0237e-02, PNorm = 124.8149, GNorm = 0.1829, lr_0 = 6.3091e-04
Loss = 2.1932e-02, PNorm = 124.8656, GNorm = 0.6518, lr_0 = 6.3048e-04
Loss = 2.3067e-02, PNorm = 124.9167, GNorm = 0.1626, lr_0 = 6.3005e-04
Loss = 2.0027e-02, PNorm = 124.9635, GNorm = 0.2946, lr_0 = 6.2962e-04
Loss = 2.3249e-02, PNorm = 125.0146, GNorm = 0.2100, lr_0 = 6.2919e-04
Loss = 1.8244e-02, PNorm = 125.0587, GNorm = 0.2063, lr_0 = 6.2876e-04
Loss = 2.0113e-02, PNorm = 125.1017, GNorm = 0.4253, lr_0 = 6.2833e-04
Loss = 2.2107e-02, PNorm = 125.1450, GNorm = 0.3958, lr_0 = 6.2789e-04
Loss = 2.3448e-02, PNorm = 125.1912, GNorm = 0.1421, lr_0 = 6.2746e-04
Loss = 2.1806e-02, PNorm = 125.2408, GNorm = 0.3210, lr_0 = 6.2703e-04
Loss = 2.6613e-02, PNorm = 125.2907, GNorm = 0.3677, lr_0 = 6.2661e-04
Loss = 2.3613e-02, PNorm = 125.3369, GNorm = 0.2964, lr_0 = 6.2618e-04
Loss = 2.5403e-02, PNorm = 125.3837, GNorm = 0.4363, lr_0 = 6.2575e-04
Loss = 2.6898e-02, PNorm = 125.4390, GNorm = 0.7639, lr_0 = 6.2532e-04
Loss = 2.4388e-02, PNorm = 125.4940, GNorm = 0.1979, lr_0 = 6.2489e-04
Loss = 2.7191e-02, PNorm = 125.5418, GNorm = 0.2403, lr_0 = 6.2446e-04
Loss = 2.4165e-02, PNorm = 125.5900, GNorm = 0.1663, lr_0 = 6.2403e-04
Loss = 2.3587e-02, PNorm = 125.6414, GNorm = 0.4903, lr_0 = 6.2361e-04
Loss = 2.8055e-02, PNorm = 125.6901, GNorm = 0.4280, lr_0 = 6.2318e-04
Loss = 2.6354e-02, PNorm = 125.7423, GNorm = 0.4184, lr_0 = 6.2275e-04
Loss = 2.2795e-02, PNorm = 125.7914, GNorm = 0.1640, lr_0 = 6.2233e-04
Loss = 2.4753e-02, PNorm = 125.8389, GNorm = 0.2845, lr_0 = 6.2190e-04
Loss = 2.4998e-02, PNorm = 125.8968, GNorm = 0.3890, lr_0 = 6.2147e-04
Loss = 2.9266e-02, PNorm = 125.9539, GNorm = 0.3414, lr_0 = 6.2105e-04
Loss = 2.4023e-02, PNorm = 126.0066, GNorm = 0.3487, lr_0 = 6.2062e-04
Loss = 2.2617e-02, PNorm = 126.0575, GNorm = 0.4947, lr_0 = 6.2020e-04
Loss = 2.7783e-02, PNorm = 126.1034, GNorm = 0.4853, lr_0 = 6.1977e-04
Loss = 2.5361e-02, PNorm = 126.1473, GNorm = 0.2548, lr_0 = 6.1935e-04
Loss = 2.4313e-02, PNorm = 126.1969, GNorm = 0.2221, lr_0 = 6.1892e-04
Loss = 2.4349e-02, PNorm = 126.2500, GNorm = 0.2996, lr_0 = 6.1850e-04
Loss = 2.0561e-02, PNorm = 126.3011, GNorm = 0.4119, lr_0 = 6.1808e-04
Loss = 3.4312e-02, PNorm = 126.3541, GNorm = 0.2305, lr_0 = 6.1765e-04
Loss = 2.7537e-02, PNorm = 126.4110, GNorm = 0.7659, lr_0 = 6.1723e-04
Loss = 2.3706e-02, PNorm = 126.4658, GNorm = 0.3457, lr_0 = 6.1681e-04
Loss = 2.5597e-02, PNorm = 126.5207, GNorm = 0.6254, lr_0 = 6.1638e-04
Loss = 2.1597e-02, PNorm = 126.5719, GNorm = 0.3737, lr_0 = 6.1596e-04
Loss = 2.2108e-02, PNorm = 126.6284, GNorm = 0.4940, lr_0 = 6.1554e-04
Loss = 2.6814e-02, PNorm = 126.6745, GNorm = 0.8330, lr_0 = 6.1512e-04
Loss = 2.7187e-02, PNorm = 126.7227, GNorm = 0.3910, lr_0 = 6.1470e-04
Loss = 2.7849e-02, PNorm = 126.7742, GNorm = 0.3170, lr_0 = 6.1428e-04
Loss = 2.6023e-02, PNorm = 126.8281, GNorm = 0.1871, lr_0 = 6.1385e-04
Loss = 2.7843e-02, PNorm = 126.8808, GNorm = 0.2671, lr_0 = 6.1343e-04
Loss = 2.1125e-02, PNorm = 126.9317, GNorm = 0.1850, lr_0 = 6.1301e-04
Loss = 2.1431e-02, PNorm = 126.9815, GNorm = 0.2897, lr_0 = 6.1259e-04
Loss = 2.4699e-02, PNorm = 127.0272, GNorm = 0.3924, lr_0 = 6.1217e-04
Loss = 2.6399e-02, PNorm = 127.0761, GNorm = 0.4197, lr_0 = 6.1175e-04
Loss = 2.2968e-02, PNorm = 127.1238, GNorm = 0.2287, lr_0 = 6.1134e-04
Loss = 2.5810e-02, PNorm = 127.1707, GNorm = 0.1645, lr_0 = 6.1092e-04
Loss = 2.5490e-02, PNorm = 127.2189, GNorm = 0.3726, lr_0 = 6.1050e-04
Validation mae = 0.485617
Epoch 8
Loss = 2.1322e-02, PNorm = 127.2613, GNorm = 0.3073, lr_0 = 6.1008e-04
Loss = 2.0283e-02, PNorm = 127.3000, GNorm = 0.1444, lr_0 = 6.0966e-04
Loss = 1.9209e-02, PNorm = 127.3348, GNorm = 0.6060, lr_0 = 6.0924e-04
Loss = 2.2295e-02, PNorm = 127.3735, GNorm = 0.4083, lr_0 = 6.0883e-04
Loss = 1.6655e-02, PNorm = 127.4098, GNorm = 0.2811, lr_0 = 6.0841e-04
Loss = 2.2257e-02, PNorm = 127.4430, GNorm = 0.2949, lr_0 = 6.0799e-04
Loss = 1.9668e-02, PNorm = 127.4772, GNorm = 0.4227, lr_0 = 6.0758e-04
Loss = 1.8419e-02, PNorm = 127.5078, GNorm = 0.2266, lr_0 = 6.0716e-04
Loss = 2.2297e-02, PNorm = 127.5406, GNorm = 0.2647, lr_0 = 6.0674e-04
Loss = 1.9720e-02, PNorm = 127.5697, GNorm = 0.2328, lr_0 = 6.0633e-04
Loss = 1.9152e-02, PNorm = 127.6025, GNorm = 0.3720, lr_0 = 6.0591e-04
Loss = 1.7218e-02, PNorm = 127.6341, GNorm = 0.2409, lr_0 = 6.0550e-04
Loss = 2.0796e-02, PNorm = 127.6629, GNorm = 0.2177, lr_0 = 6.0508e-04
Loss = 1.7697e-02, PNorm = 127.6963, GNorm = 0.1969, lr_0 = 6.0467e-04
Loss = 2.2167e-02, PNorm = 127.7258, GNorm = 0.2635, lr_0 = 6.0425e-04
Loss = 1.7282e-02, PNorm = 127.7651, GNorm = 0.1543, lr_0 = 6.0384e-04
Loss = 1.5066e-02, PNorm = 127.7994, GNorm = 0.3248, lr_0 = 6.0343e-04
Loss = 1.8251e-02, PNorm = 127.8343, GNorm = 0.3932, lr_0 = 6.0301e-04
Loss = 2.0115e-02, PNorm = 127.8750, GNorm = 0.1891, lr_0 = 6.0260e-04
Loss = 2.0006e-02, PNorm = 127.9114, GNorm = 0.6464, lr_0 = 6.0219e-04
Loss = 2.2716e-02, PNorm = 127.9466, GNorm = 0.5233, lr_0 = 6.0178e-04
Loss = 1.8594e-02, PNorm = 127.9876, GNorm = 0.1888, lr_0 = 6.0136e-04
Loss = 2.0620e-02, PNorm = 128.0245, GNorm = 0.8566, lr_0 = 6.0095e-04
Loss = 1.5409e-02, PNorm = 128.0558, GNorm = 0.4404, lr_0 = 6.0054e-04
Loss = 2.2208e-02, PNorm = 128.0839, GNorm = 0.4989, lr_0 = 6.0013e-04
Loss = 1.8980e-02, PNorm = 128.1197, GNorm = 0.5020, lr_0 = 5.9972e-04
Loss = 2.0059e-02, PNorm = 128.1593, GNorm = 0.4205, lr_0 = 5.9931e-04
Loss = 1.5879e-02, PNorm = 128.1937, GNorm = 0.2027, lr_0 = 5.9890e-04
Loss = 1.8869e-02, PNorm = 128.2277, GNorm = 0.5542, lr_0 = 5.9849e-04
Loss = 1.6749e-02, PNorm = 128.2619, GNorm = 0.4513, lr_0 = 5.9808e-04
Loss = 1.8797e-02, PNorm = 128.3001, GNorm = 0.3674, lr_0 = 5.9767e-04
Loss = 1.5182e-02, PNorm = 128.3397, GNorm = 0.1263, lr_0 = 5.9726e-04
Loss = 1.4576e-02, PNorm = 128.3778, GNorm = 0.1742, lr_0 = 5.9685e-04
Loss = 1.7489e-02, PNorm = 128.4129, GNorm = 0.1750, lr_0 = 5.9644e-04
Loss = 1.5481e-02, PNorm = 128.4473, GNorm = 0.4299, lr_0 = 5.9603e-04
Loss = 1.7070e-02, PNorm = 128.4820, GNorm = 0.2740, lr_0 = 5.9562e-04
Loss = 1.6895e-02, PNorm = 128.5160, GNorm = 0.2815, lr_0 = 5.9521e-04
Loss = 1.8817e-02, PNorm = 128.5573, GNorm = 0.3204, lr_0 = 5.9481e-04
Loss = 1.6070e-02, PNorm = 128.5947, GNorm = 0.1795, lr_0 = 5.9440e-04
Loss = 1.8312e-02, PNorm = 128.6291, GNorm = 0.5575, lr_0 = 5.9399e-04
Loss = 2.1840e-02, PNorm = 128.6708, GNorm = 0.5723, lr_0 = 5.9358e-04
Loss = 1.8095e-02, PNorm = 128.7155, GNorm = 0.7408, lr_0 = 5.9318e-04
Loss = 1.7934e-02, PNorm = 128.7547, GNorm = 0.2233, lr_0 = 5.9277e-04
Loss = 1.7234e-02, PNorm = 128.7884, GNorm = 0.3325, lr_0 = 5.9236e-04
Loss = 1.8150e-02, PNorm = 128.8290, GNorm = 0.2374, lr_0 = 5.9196e-04
Loss = 1.8103e-02, PNorm = 128.8707, GNorm = 0.5845, lr_0 = 5.9155e-04
Loss = 1.5916e-02, PNorm = 128.9033, GNorm = 0.1510, lr_0 = 5.9115e-04
Loss = 1.5807e-02, PNorm = 128.9365, GNorm = 0.2137, lr_0 = 5.9074e-04
Loss = 1.7162e-02, PNorm = 128.9736, GNorm = 0.3649, lr_0 = 5.9034e-04
Loss = 1.4387e-02, PNorm = 129.0114, GNorm = 0.2434, lr_0 = 5.8993e-04
Loss = 1.6593e-02, PNorm = 129.0489, GNorm = 0.3130, lr_0 = 5.8953e-04
Loss = 1.6462e-02, PNorm = 129.0858, GNorm = 0.5263, lr_0 = 5.8913e-04
Loss = 2.1392e-02, PNorm = 129.1167, GNorm = 0.5392, lr_0 = 5.8872e-04
Loss = 1.9934e-02, PNorm = 129.1557, GNorm = 0.2993, lr_0 = 5.8832e-04
Loss = 1.8282e-02, PNorm = 129.1921, GNorm = 0.3384, lr_0 = 5.8792e-04
Loss = 2.0779e-02, PNorm = 129.2297, GNorm = 0.3131, lr_0 = 5.8751e-04
Loss = 1.6806e-02, PNorm = 129.2658, GNorm = 0.2362, lr_0 = 5.8711e-04
Loss = 1.9648e-02, PNorm = 129.2942, GNorm = 0.2657, lr_0 = 5.8671e-04
Loss = 1.6787e-02, PNorm = 129.3286, GNorm = 0.3629, lr_0 = 5.8631e-04
Loss = 1.8537e-02, PNorm = 129.3657, GNorm = 0.1906, lr_0 = 5.8591e-04
Loss = 1.7855e-02, PNorm = 129.4034, GNorm = 0.2317, lr_0 = 5.8550e-04
Loss = 1.8232e-02, PNorm = 129.4390, GNorm = 0.3279, lr_0 = 5.8510e-04
Loss = 2.0173e-02, PNorm = 129.4697, GNorm = 0.6883, lr_0 = 5.8470e-04
Loss = 1.6403e-02, PNorm = 129.5071, GNorm = 0.7723, lr_0 = 5.8430e-04
Loss = 1.8467e-02, PNorm = 129.5402, GNorm = 0.4771, lr_0 = 5.8390e-04
Loss = 2.0408e-02, PNorm = 129.5785, GNorm = 0.6541, lr_0 = 5.8350e-04
Loss = 1.9906e-02, PNorm = 129.6196, GNorm = 0.1964, lr_0 = 5.8310e-04
Loss = 1.8332e-02, PNorm = 129.6561, GNorm = 0.2191, lr_0 = 5.8270e-04
Loss = 1.8028e-02, PNorm = 129.6974, GNorm = 0.4403, lr_0 = 5.8230e-04
Loss = 1.7880e-02, PNorm = 129.7343, GNorm = 0.3684, lr_0 = 5.8190e-04
Loss = 1.7308e-02, PNorm = 129.7707, GNorm = 0.2698, lr_0 = 5.8151e-04
Loss = 1.9371e-02, PNorm = 129.8101, GNorm = 0.2758, lr_0 = 5.8111e-04
Loss = 1.8447e-02, PNorm = 129.8538, GNorm = 0.3395, lr_0 = 5.8071e-04
Loss = 1.6993e-02, PNorm = 129.8899, GNorm = 0.2412, lr_0 = 5.8031e-04
Loss = 1.7261e-02, PNorm = 129.9290, GNorm = 0.2918, lr_0 = 5.7991e-04
Loss = 1.8405e-02, PNorm = 129.9664, GNorm = 0.5427, lr_0 = 5.7952e-04
Loss = 2.0627e-02, PNorm = 130.0086, GNorm = 0.6613, lr_0 = 5.7912e-04
Loss = 1.9040e-02, PNorm = 130.0493, GNorm = 0.3160, lr_0 = 5.7872e-04
Loss = 1.7838e-02, PNorm = 130.0943, GNorm = 0.2861, lr_0 = 5.7833e-04
Loss = 1.8784e-02, PNorm = 130.1349, GNorm = 0.4907, lr_0 = 5.7793e-04
Loss = 1.9798e-02, PNorm = 130.1755, GNorm = 0.4535, lr_0 = 5.7753e-04
Loss = 2.0431e-02, PNorm = 130.2178, GNorm = 0.2970, lr_0 = 5.7714e-04
Loss = 1.6783e-02, PNorm = 130.2576, GNorm = 0.1492, lr_0 = 5.7674e-04
Loss = 1.6930e-02, PNorm = 130.2962, GNorm = 0.1724, lr_0 = 5.7635e-04
Loss = 1.6304e-02, PNorm = 130.3387, GNorm = 0.1526, lr_0 = 5.7595e-04
Loss = 1.6487e-02, PNorm = 130.3800, GNorm = 0.6500, lr_0 = 5.7556e-04
Loss = 1.6985e-02, PNorm = 130.4195, GNorm = 0.6167, lr_0 = 5.7516e-04
Loss = 1.7689e-02, PNorm = 130.4531, GNorm = 0.1756, lr_0 = 5.7477e-04
Loss = 1.6586e-02, PNorm = 130.4920, GNorm = 0.2938, lr_0 = 5.7438e-04
Loss = 1.9373e-02, PNorm = 130.5375, GNorm = 0.7275, lr_0 = 5.7398e-04
Loss = 1.7780e-02, PNorm = 130.5732, GNorm = 0.9020, lr_0 = 5.7359e-04
Loss = 1.8382e-02, PNorm = 130.6034, GNorm = 0.3396, lr_0 = 5.7320e-04
Loss = 1.7962e-02, PNorm = 130.6467, GNorm = 0.2714, lr_0 = 5.7280e-04
Loss = 2.0971e-02, PNorm = 130.6998, GNorm = 0.1893, lr_0 = 5.7241e-04
Loss = 1.7783e-02, PNorm = 130.7459, GNorm = 0.2052, lr_0 = 5.7202e-04
Loss = 2.0103e-02, PNorm = 130.7917, GNorm = 0.2937, lr_0 = 5.7163e-04
Loss = 1.7056e-02, PNorm = 130.8328, GNorm = 0.2741, lr_0 = 5.7124e-04
Loss = 2.0517e-02, PNorm = 130.8743, GNorm = 0.2032, lr_0 = 5.7084e-04
Loss = 1.7506e-02, PNorm = 130.9132, GNorm = 0.2650, lr_0 = 5.7045e-04
Loss = 2.0807e-02, PNorm = 130.9549, GNorm = 0.2038, lr_0 = 5.7006e-04
Loss = 1.4367e-02, PNorm = 131.0020, GNorm = 0.2668, lr_0 = 5.6967e-04
Loss = 2.5108e-02, PNorm = 131.0434, GNorm = 0.8305, lr_0 = 5.6928e-04
Loss = 2.0569e-02, PNorm = 131.0889, GNorm = 0.2517, lr_0 = 5.6889e-04
Loss = 2.0303e-02, PNorm = 131.1342, GNorm = 0.2693, lr_0 = 5.6850e-04
Loss = 2.2440e-02, PNorm = 131.1900, GNorm = 0.3014, lr_0 = 5.6811e-04
Loss = 1.8459e-02, PNorm = 131.2370, GNorm = 0.1430, lr_0 = 5.6772e-04
Loss = 1.9227e-02, PNorm = 131.2775, GNorm = 0.2151, lr_0 = 5.6733e-04
Loss = 1.8577e-02, PNorm = 131.3210, GNorm = 0.1706, lr_0 = 5.6695e-04
Loss = 1.8878e-02, PNorm = 131.3619, GNorm = 0.2010, lr_0 = 5.6656e-04
Loss = 2.0007e-02, PNorm = 131.4107, GNorm = 0.3408, lr_0 = 5.6617e-04
Loss = 1.5876e-02, PNorm = 131.4502, GNorm = 0.2698, lr_0 = 5.6578e-04
Loss = 1.6521e-02, PNorm = 131.4869, GNorm = 0.2132, lr_0 = 5.6539e-04
Loss = 1.7396e-02, PNorm = 131.5255, GNorm = 0.6123, lr_0 = 5.6501e-04
Loss = 2.0480e-02, PNorm = 131.5628, GNorm = 0.3049, lr_0 = 5.6462e-04
Loss = 1.5673e-02, PNorm = 131.5983, GNorm = 0.1365, lr_0 = 5.6423e-04
Loss = 1.8102e-02, PNorm = 131.6314, GNorm = 0.1464, lr_0 = 5.6385e-04
Loss = 1.9986e-02, PNorm = 131.6733, GNorm = 0.2561, lr_0 = 5.6346e-04
Loss = 1.8629e-02, PNorm = 131.7184, GNorm = 0.2169, lr_0 = 5.6307e-04
Loss = 1.8684e-02, PNorm = 131.7613, GNorm = 0.4289, lr_0 = 5.6269e-04
Loss = 1.8682e-02, PNorm = 131.8095, GNorm = 0.7471, lr_0 = 5.6230e-04
Validation mae = 0.484375
Epoch 9
Loss = 1.6162e-02, PNorm = 131.8461, GNorm = 0.4364, lr_0 = 5.6192e-04
Loss = 1.5101e-02, PNorm = 131.8692, GNorm = 0.1886, lr_0 = 5.6153e-04
Loss = 1.4011e-02, PNorm = 131.8931, GNorm = 0.2263, lr_0 = 5.6115e-04
Loss = 1.4582e-02, PNorm = 131.9212, GNorm = 0.2115, lr_0 = 5.6076e-04
Loss = 1.7759e-02, PNorm = 131.9433, GNorm = 0.1961, lr_0 = 5.6038e-04
Loss = 1.5459e-02, PNorm = 131.9689, GNorm = 0.4226, lr_0 = 5.6000e-04
Loss = 1.4658e-02, PNorm = 131.9943, GNorm = 0.3546, lr_0 = 5.5961e-04
Loss = 1.6129e-02, PNorm = 132.0203, GNorm = 0.7320, lr_0 = 5.5923e-04
Loss = 1.4222e-02, PNorm = 132.0440, GNorm = 0.1866, lr_0 = 5.5885e-04
Loss = 1.5277e-02, PNorm = 132.0724, GNorm = 0.3427, lr_0 = 5.5846e-04
Loss = 1.4748e-02, PNorm = 132.1023, GNorm = 0.2061, lr_0 = 5.5808e-04
Loss = 1.9024e-02, PNorm = 132.1330, GNorm = 0.5000, lr_0 = 5.5770e-04
Loss = 1.8143e-02, PNorm = 132.1745, GNorm = 0.8908, lr_0 = 5.5732e-04
Loss = 1.4146e-02, PNorm = 132.2113, GNorm = 0.4619, lr_0 = 5.5693e-04
Loss = 1.4381e-02, PNorm = 132.2370, GNorm = 0.3087, lr_0 = 5.5655e-04
Loss = 1.4852e-02, PNorm = 132.2661, GNorm = 0.3044, lr_0 = 5.5617e-04
Loss = 1.4211e-02, PNorm = 132.2967, GNorm = 0.1763, lr_0 = 5.5579e-04
Loss = 1.6644e-02, PNorm = 132.3306, GNorm = 0.2946, lr_0 = 5.5541e-04
Loss = 1.6596e-02, PNorm = 132.3669, GNorm = 0.2082, lr_0 = 5.5503e-04
Loss = 1.7113e-02, PNorm = 132.4023, GNorm = 0.4060, lr_0 = 5.5465e-04
Loss = 1.3048e-02, PNorm = 132.4295, GNorm = 0.6100, lr_0 = 5.5427e-04
Loss = 1.6573e-02, PNorm = 132.4569, GNorm = 0.2909, lr_0 = 5.5389e-04
Loss = 1.5114e-02, PNorm = 132.4897, GNorm = 0.2008, lr_0 = 5.5351e-04
Loss = 1.5509e-02, PNorm = 132.5283, GNorm = 0.4544, lr_0 = 5.5313e-04
Loss = 1.5413e-02, PNorm = 132.5605, GNorm = 0.1363, lr_0 = 5.5275e-04
Loss = 1.7706e-02, PNorm = 132.5947, GNorm = 0.9401, lr_0 = 5.5237e-04
Loss = 1.5603e-02, PNorm = 132.6239, GNorm = 0.5792, lr_0 = 5.5199e-04
Loss = 1.3991e-02, PNorm = 132.6627, GNorm = 0.3123, lr_0 = 5.5162e-04
Loss = 1.3390e-02, PNorm = 132.6939, GNorm = 0.3041, lr_0 = 5.5124e-04
Loss = 1.7958e-02, PNorm = 132.7235, GNorm = 0.2447, lr_0 = 5.5086e-04
Loss = 1.7019e-02, PNorm = 132.7554, GNorm = 0.3352, lr_0 = 5.5048e-04
Loss = 1.3787e-02, PNorm = 132.7844, GNorm = 0.6894, lr_0 = 5.5011e-04
Loss = 1.3959e-02, PNorm = 132.8157, GNorm = 0.2220, lr_0 = 5.4973e-04
Loss = 1.2534e-02, PNorm = 132.8395, GNorm = 0.4372, lr_0 = 5.4935e-04
Loss = 1.3905e-02, PNorm = 132.8702, GNorm = 0.2526, lr_0 = 5.4898e-04
Loss = 1.3882e-02, PNorm = 132.8990, GNorm = 0.3510, lr_0 = 5.4860e-04
Loss = 1.2678e-02, PNorm = 132.9255, GNorm = 0.2138, lr_0 = 5.4822e-04
Loss = 1.5003e-02, PNorm = 132.9513, GNorm = 0.5301, lr_0 = 5.4785e-04
Loss = 1.6157e-02, PNorm = 132.9780, GNorm = 0.1457, lr_0 = 5.4747e-04
Loss = 1.3261e-02, PNorm = 133.0096, GNorm = 0.4276, lr_0 = 5.4710e-04
Loss = 1.5055e-02, PNorm = 133.0400, GNorm = 0.4680, lr_0 = 5.4672e-04
Loss = 1.6587e-02, PNorm = 133.0651, GNorm = 0.3636, lr_0 = 5.4635e-04
Loss = 1.7670e-02, PNorm = 133.1029, GNorm = 0.3323, lr_0 = 5.4597e-04
Loss = 1.6406e-02, PNorm = 133.1377, GNorm = 0.3276, lr_0 = 5.4560e-04
Loss = 1.3951e-02, PNorm = 133.1709, GNorm = 0.4677, lr_0 = 5.4523e-04
Loss = 1.5427e-02, PNorm = 133.2024, GNorm = 0.3473, lr_0 = 5.4485e-04
Loss = 1.3776e-02, PNorm = 133.2330, GNorm = 0.2992, lr_0 = 5.4448e-04
Loss = 1.5787e-02, PNorm = 133.2592, GNorm = 0.4252, lr_0 = 5.4411e-04
Loss = 1.3662e-02, PNorm = 133.2926, GNorm = 0.3178, lr_0 = 5.4373e-04
Loss = 1.4001e-02, PNorm = 133.3264, GNorm = 0.2870, lr_0 = 5.4336e-04
Loss = 1.7811e-02, PNorm = 133.3614, GNorm = 0.2568, lr_0 = 5.4299e-04
Loss = 1.6107e-02, PNorm = 133.3997, GNorm = 0.2688, lr_0 = 5.4262e-04
Loss = 1.6190e-02, PNorm = 133.4274, GNorm = 0.2423, lr_0 = 5.4225e-04
Loss = 1.5364e-02, PNorm = 133.4593, GNorm = 0.2359, lr_0 = 5.4187e-04
Loss = 1.4942e-02, PNorm = 133.4901, GNorm = 0.2851, lr_0 = 5.4150e-04
Loss = 1.3092e-02, PNorm = 133.5261, GNorm = 0.4414, lr_0 = 5.4113e-04
Loss = 1.4068e-02, PNorm = 133.5554, GNorm = 0.1904, lr_0 = 5.4076e-04
Loss = 1.4206e-02, PNorm = 133.5862, GNorm = 0.2314, lr_0 = 5.4039e-04
Loss = 1.6179e-02, PNorm = 133.6189, GNorm = 0.2658, lr_0 = 5.4002e-04
Loss = 1.3058e-02, PNorm = 133.6486, GNorm = 0.3252, lr_0 = 5.3965e-04
Loss = 1.7187e-02, PNorm = 133.6742, GNorm = 0.4219, lr_0 = 5.3928e-04
Loss = 1.7014e-02, PNorm = 133.7000, GNorm = 0.1281, lr_0 = 5.3891e-04
Loss = 1.3075e-02, PNorm = 133.7264, GNorm = 0.3770, lr_0 = 5.3854e-04
Loss = 1.6356e-02, PNorm = 133.7573, GNorm = 0.4728, lr_0 = 5.3817e-04
Loss = 1.6023e-02, PNorm = 133.7927, GNorm = 0.2860, lr_0 = 5.3781e-04
Loss = 1.3079e-02, PNorm = 133.8263, GNorm = 0.2680, lr_0 = 5.3744e-04
Loss = 1.4766e-02, PNorm = 133.8532, GNorm = 0.3161, lr_0 = 5.3707e-04
Loss = 1.5280e-02, PNorm = 133.8827, GNorm = 0.6184, lr_0 = 5.3670e-04
Loss = 1.3449e-02, PNorm = 133.9105, GNorm = 0.3205, lr_0 = 5.3633e-04
Loss = 1.3510e-02, PNorm = 133.9461, GNorm = 0.2304, lr_0 = 5.3597e-04
Loss = 1.5325e-02, PNorm = 133.9852, GNorm = 0.2128, lr_0 = 5.3560e-04
Loss = 1.5227e-02, PNorm = 134.0245, GNorm = 0.2344, lr_0 = 5.3523e-04
Loss = 1.4394e-02, PNorm = 134.0637, GNorm = 0.3601, lr_0 = 5.3486e-04
Loss = 1.5743e-02, PNorm = 134.1033, GNorm = 0.1402, lr_0 = 5.3450e-04
Loss = 1.8550e-02, PNorm = 134.1332, GNorm = 0.2243, lr_0 = 5.3413e-04
Loss = 1.4063e-02, PNorm = 134.1678, GNorm = 0.4770, lr_0 = 5.3377e-04
Loss = 1.4042e-02, PNorm = 134.2012, GNorm = 0.3183, lr_0 = 5.3340e-04
Loss = 1.3052e-02, PNorm = 134.2376, GNorm = 0.3039, lr_0 = 5.3304e-04
Loss = 1.6593e-02, PNorm = 134.2724, GNorm = 0.3299, lr_0 = 5.3267e-04
Loss = 1.3292e-02, PNorm = 134.3052, GNorm = 0.2899, lr_0 = 5.3231e-04
Loss = 1.3448e-02, PNorm = 134.3363, GNorm = 0.2096, lr_0 = 5.3194e-04
Loss = 1.5769e-02, PNorm = 134.3698, GNorm = 0.1211, lr_0 = 5.3158e-04
Loss = 1.3564e-02, PNorm = 134.4018, GNorm = 0.4433, lr_0 = 5.3121e-04
Loss = 1.3535e-02, PNorm = 134.4324, GNorm = 0.1194, lr_0 = 5.3085e-04
Loss = 1.3792e-02, PNorm = 134.4678, GNorm = 0.1159, lr_0 = 5.3048e-04
Loss = 1.5026e-02, PNorm = 134.4987, GNorm = 0.4716, lr_0 = 5.3012e-04
Loss = 1.7966e-02, PNorm = 134.5295, GNorm = 0.6332, lr_0 = 5.2976e-04
Loss = 1.5425e-02, PNorm = 134.5593, GNorm = 0.4345, lr_0 = 5.2939e-04
Loss = 1.5752e-02, PNorm = 134.5880, GNorm = 0.2410, lr_0 = 5.2903e-04
Loss = 1.6132e-02, PNorm = 134.6181, GNorm = 0.2353, lr_0 = 5.2867e-04
Loss = 1.2685e-02, PNorm = 134.6551, GNorm = 0.1973, lr_0 = 5.2831e-04
Loss = 1.2747e-02, PNorm = 134.6900, GNorm = 0.2900, lr_0 = 5.2795e-04
Loss = 1.5231e-02, PNorm = 134.7251, GNorm = 0.1745, lr_0 = 5.2758e-04
Loss = 1.6828e-02, PNorm = 134.7609, GNorm = 0.2461, lr_0 = 5.2722e-04
Loss = 1.7444e-02, PNorm = 134.7944, GNorm = 0.3654, lr_0 = 5.2686e-04
Loss = 1.3970e-02, PNorm = 134.8372, GNorm = 0.3604, lr_0 = 5.2650e-04
Loss = 1.3758e-02, PNorm = 134.8748, GNorm = 0.3838, lr_0 = 5.2614e-04
Loss = 1.6718e-02, PNorm = 134.9117, GNorm = 0.2967, lr_0 = 5.2578e-04
Loss = 1.4765e-02, PNorm = 134.9451, GNorm = 0.4416, lr_0 = 5.2542e-04
Loss = 1.2619e-02, PNorm = 134.9831, GNorm = 0.3916, lr_0 = 5.2506e-04
Loss = 1.7113e-02, PNorm = 135.0145, GNorm = 0.7337, lr_0 = 5.2470e-04
Loss = 1.5464e-02, PNorm = 135.0579, GNorm = 0.5871, lr_0 = 5.2434e-04
Loss = 1.2566e-02, PNorm = 135.0941, GNorm = 0.3755, lr_0 = 5.2398e-04
Loss = 1.4992e-02, PNorm = 135.1283, GNorm = 0.2819, lr_0 = 5.2362e-04
Loss = 1.3461e-02, PNorm = 135.1668, GNorm = 0.2714, lr_0 = 5.2326e-04
Loss = 1.4678e-02, PNorm = 135.2019, GNorm = 0.5172, lr_0 = 5.2290e-04
Loss = 1.5274e-02, PNorm = 135.2369, GNorm = 0.4272, lr_0 = 5.2255e-04
Loss = 1.4320e-02, PNorm = 135.2768, GNorm = 0.2867, lr_0 = 5.2219e-04
Loss = 1.5145e-02, PNorm = 135.3188, GNorm = 0.2629, lr_0 = 5.2183e-04
Loss = 1.7426e-02, PNorm = 135.3595, GNorm = 1.0171, lr_0 = 5.2147e-04
Loss = 1.3506e-02, PNorm = 135.3979, GNorm = 0.3232, lr_0 = 5.2112e-04
Loss = 1.6613e-02, PNorm = 135.4349, GNorm = 0.2202, lr_0 = 5.2076e-04
Loss = 1.5572e-02, PNorm = 135.4657, GNorm = 0.3664, lr_0 = 5.2040e-04
Loss = 1.5428e-02, PNorm = 135.4996, GNorm = 0.1780, lr_0 = 5.2005e-04
Loss = 1.5342e-02, PNorm = 135.5277, GNorm = 0.4065, lr_0 = 5.1969e-04
Loss = 1.5356e-02, PNorm = 135.5660, GNorm = 0.2500, lr_0 = 5.1933e-04
Loss = 1.6612e-02, PNorm = 135.6052, GNorm = 0.5291, lr_0 = 5.1898e-04
Loss = 1.6629e-02, PNorm = 135.6413, GNorm = 0.2879, lr_0 = 5.1862e-04
Loss = 1.3972e-02, PNorm = 135.6815, GNorm = 0.1397, lr_0 = 5.1827e-04
Loss = 1.6686e-02, PNorm = 135.7165, GNorm = 0.4460, lr_0 = 5.1791e-04
Validation mae = 0.483873
Epoch 10
Loss = 1.4453e-02, PNorm = 135.7469, GNorm = 0.3329, lr_0 = 5.1756e-04
Loss = 1.3590e-02, PNorm = 135.7710, GNorm = 0.1694, lr_0 = 5.1720e-04
Loss = 1.3149e-02, PNorm = 135.7923, GNorm = 0.2069, lr_0 = 5.1685e-04
Loss = 1.6122e-02, PNorm = 135.8191, GNorm = 0.3027, lr_0 = 5.1649e-04
Loss = 1.1651e-02, PNorm = 135.8468, GNorm = 0.3503, lr_0 = 5.1614e-04
Loss = 1.3763e-02, PNorm = 135.8748, GNorm = 0.2929, lr_0 = 5.1579e-04
Loss = 1.4650e-02, PNorm = 135.9062, GNorm = 0.1357, lr_0 = 5.1543e-04
Loss = 1.1459e-02, PNorm = 135.9325, GNorm = 0.4433, lr_0 = 5.1508e-04
Loss = 1.3247e-02, PNorm = 135.9557, GNorm = 0.1499, lr_0 = 5.1473e-04
Loss = 1.1987e-02, PNorm = 135.9803, GNorm = 0.3696, lr_0 = 5.1437e-04
Loss = 1.2057e-02, PNorm = 136.0054, GNorm = 0.4002, lr_0 = 5.1402e-04
Loss = 1.3436e-02, PNorm = 136.0351, GNorm = 0.4227, lr_0 = 5.1367e-04
Loss = 1.4014e-02, PNorm = 136.0581, GNorm = 0.1700, lr_0 = 5.1332e-04
Loss = 1.1138e-02, PNorm = 136.0873, GNorm = 0.4355, lr_0 = 5.1297e-04
Loss = 1.1156e-02, PNorm = 136.1131, GNorm = 0.2763, lr_0 = 5.1262e-04
Loss = 1.4179e-02, PNorm = 136.1342, GNorm = 0.1910, lr_0 = 5.1226e-04
Loss = 1.1251e-02, PNorm = 136.1546, GNorm = 0.1431, lr_0 = 5.1191e-04
Loss = 1.4468e-02, PNorm = 136.1793, GNorm = 0.2641, lr_0 = 5.1156e-04
Loss = 1.4669e-02, PNorm = 136.2037, GNorm = 0.5651, lr_0 = 5.1121e-04
Loss = 1.1638e-02, PNorm = 136.2291, GNorm = 0.1265, lr_0 = 5.1086e-04
Loss = 1.1560e-02, PNorm = 136.2522, GNorm = 0.1794, lr_0 = 5.1051e-04
Loss = 1.3890e-02, PNorm = 136.2799, GNorm = 0.2699, lr_0 = 5.1016e-04
Loss = 1.1885e-02, PNorm = 136.3085, GNorm = 0.5549, lr_0 = 5.0981e-04
Loss = 1.2126e-02, PNorm = 136.3304, GNorm = 0.2588, lr_0 = 5.0946e-04
Loss = 1.0938e-02, PNorm = 136.3498, GNorm = 0.6587, lr_0 = 5.0911e-04
Loss = 1.3709e-02, PNorm = 136.3729, GNorm = 0.5481, lr_0 = 5.0877e-04
Loss = 1.1708e-02, PNorm = 136.3966, GNorm = 0.1862, lr_0 = 5.0842e-04
Loss = 1.2226e-02, PNorm = 136.4255, GNorm = 0.3327, lr_0 = 5.0807e-04
Loss = 1.2158e-02, PNorm = 136.4485, GNorm = 0.5208, lr_0 = 5.0772e-04
Loss = 1.2116e-02, PNorm = 136.4761, GNorm = 0.1683, lr_0 = 5.0737e-04
Loss = 9.4336e-03, PNorm = 136.5018, GNorm = 0.4437, lr_0 = 5.0703e-04
Loss = 1.1115e-02, PNorm = 136.5266, GNorm = 0.1807, lr_0 = 5.0668e-04
Loss = 1.0547e-02, PNorm = 136.5525, GNorm = 0.3339, lr_0 = 5.0633e-04
Loss = 1.0883e-02, PNorm = 136.5730, GNorm = 0.2216, lr_0 = 5.0598e-04
Loss = 1.1456e-02, PNorm = 136.5993, GNorm = 0.1517, lr_0 = 5.0564e-04
Loss = 1.3006e-02, PNorm = 136.6251, GNorm = 0.1466, lr_0 = 5.0529e-04
Loss = 1.4464e-02, PNorm = 136.6486, GNorm = 0.3784, lr_0 = 5.0494e-04
Loss = 1.1136e-02, PNorm = 136.6782, GNorm = 0.2087, lr_0 = 5.0460e-04
Loss = 1.2592e-02, PNorm = 136.7074, GNorm = 0.2194, lr_0 = 5.0425e-04
Loss = 1.1577e-02, PNorm = 136.7365, GNorm = 0.2491, lr_0 = 5.0391e-04
Loss = 1.0172e-02, PNorm = 136.7641, GNorm = 0.2637, lr_0 = 5.0356e-04
Loss = 1.1574e-02, PNorm = 136.7901, GNorm = 0.1277, lr_0 = 5.0322e-04
Loss = 1.0904e-02, PNorm = 136.8197, GNorm = 0.4103, lr_0 = 5.0287e-04
Loss = 1.3029e-02, PNorm = 136.8453, GNorm = 0.2606, lr_0 = 5.0253e-04
Loss = 1.1341e-02, PNorm = 136.8756, GNorm = 0.1044, lr_0 = 5.0218e-04
Loss = 1.0672e-02, PNorm = 136.9045, GNorm = 0.3080, lr_0 = 5.0184e-04
Loss = 1.1410e-02, PNorm = 136.9298, GNorm = 0.1593, lr_0 = 5.0150e-04
Loss = 1.2049e-02, PNorm = 136.9558, GNorm = 0.8610, lr_0 = 5.0115e-04
Loss = 1.1226e-02, PNorm = 136.9795, GNorm = 0.0891, lr_0 = 5.0081e-04
Loss = 1.1411e-02, PNorm = 137.0060, GNorm = 0.1021, lr_0 = 5.0047e-04
Loss = 1.4714e-02, PNorm = 137.0302, GNorm = 0.3292, lr_0 = 5.0012e-04
Loss = 1.3363e-02, PNorm = 137.0553, GNorm = 0.4749, lr_0 = 4.9978e-04
Loss = 1.3235e-02, PNorm = 137.0770, GNorm = 0.4608, lr_0 = 4.9944e-04
Loss = 1.4959e-02, PNorm = 137.1074, GNorm = 0.4544, lr_0 = 4.9910e-04
Loss = 1.0404e-02, PNorm = 137.1338, GNorm = 0.1409, lr_0 = 4.9875e-04
Loss = 1.3252e-02, PNorm = 137.1563, GNorm = 0.1539, lr_0 = 4.9841e-04
Loss = 1.2141e-02, PNorm = 137.1848, GNorm = 0.1143, lr_0 = 4.9807e-04
Loss = 1.2092e-02, PNorm = 137.2180, GNorm = 0.3356, lr_0 = 4.9773e-04
Loss = 1.2491e-02, PNorm = 137.2476, GNorm = 0.1935, lr_0 = 4.9739e-04
Loss = 1.1311e-02, PNorm = 137.2777, GNorm = 0.1121, lr_0 = 4.9705e-04
Loss = 1.2784e-02, PNorm = 137.3081, GNorm = 0.1206, lr_0 = 4.9671e-04
Loss = 1.2047e-02, PNorm = 137.3347, GNorm = 0.1591, lr_0 = 4.9637e-04
Loss = 1.7079e-02, PNorm = 137.3584, GNorm = 0.2612, lr_0 = 4.9603e-04
Loss = 1.2710e-02, PNorm = 137.3911, GNorm = 0.3311, lr_0 = 4.9569e-04
Loss = 1.1874e-02, PNorm = 137.4165, GNorm = 0.4911, lr_0 = 4.9535e-04
Loss = 1.0578e-02, PNorm = 137.4417, GNorm = 0.1710, lr_0 = 4.9501e-04
Loss = 1.1357e-02, PNorm = 137.4637, GNorm = 0.2543, lr_0 = 4.9467e-04
Loss = 1.3188e-02, PNorm = 137.4926, GNorm = 0.4454, lr_0 = 4.9433e-04
Loss = 1.0420e-02, PNorm = 137.5248, GNorm = 0.1563, lr_0 = 4.9399e-04
Loss = 1.3770e-02, PNorm = 137.5565, GNorm = 0.3954, lr_0 = 4.9365e-04
Loss = 1.3717e-02, PNorm = 137.5824, GNorm = 0.7493, lr_0 = 4.9332e-04
Loss = 1.0938e-02, PNorm = 137.6103, GNorm = 0.5147, lr_0 = 4.9298e-04
Loss = 1.2617e-02, PNorm = 137.6342, GNorm = 0.2179, lr_0 = 4.9264e-04
Loss = 1.1296e-02, PNorm = 137.6612, GNorm = 0.1469, lr_0 = 4.9230e-04
Loss = 1.1363e-02, PNorm = 137.6878, GNorm = 0.2901, lr_0 = 4.9197e-04
Loss = 1.0770e-02, PNorm = 137.7206, GNorm = 0.2147, lr_0 = 4.9163e-04
Loss = 1.0458e-02, PNorm = 137.7441, GNorm = 0.2068, lr_0 = 4.9129e-04
Loss = 1.1567e-02, PNorm = 137.7672, GNorm = 0.2236, lr_0 = 4.9095e-04
Loss = 1.3932e-02, PNorm = 137.7908, GNorm = 0.2049, lr_0 = 4.9062e-04
Loss = 1.0705e-02, PNorm = 137.8220, GNorm = 0.3269, lr_0 = 4.9028e-04
Loss = 1.1176e-02, PNorm = 137.8557, GNorm = 0.3274, lr_0 = 4.8995e-04
Loss = 1.1037e-02, PNorm = 137.8887, GNorm = 0.2652, lr_0 = 4.8961e-04
Loss = 1.1457e-02, PNorm = 137.9189, GNorm = 0.4302, lr_0 = 4.8928e-04
Loss = 1.2412e-02, PNorm = 137.9441, GNorm = 0.5028, lr_0 = 4.8894e-04
Loss = 1.6724e-02, PNorm = 137.9788, GNorm = 0.3567, lr_0 = 4.8861e-04
Loss = 1.3102e-02, PNorm = 138.0103, GNorm = 0.4841, lr_0 = 4.8827e-04
Loss = 1.1570e-02, PNorm = 138.0375, GNorm = 0.5536, lr_0 = 4.8794e-04
Loss = 1.4337e-02, PNorm = 138.0678, GNorm = 0.2843, lr_0 = 4.8760e-04
Loss = 1.2095e-02, PNorm = 138.0957, GNorm = 0.4170, lr_0 = 4.8727e-04
Loss = 1.5782e-02, PNorm = 138.1299, GNorm = 0.4378, lr_0 = 4.8693e-04
Loss = 1.0983e-02, PNorm = 138.1630, GNorm = 0.1785, lr_0 = 4.8660e-04
Loss = 1.0674e-02, PNorm = 138.1926, GNorm = 0.2383, lr_0 = 4.8627e-04
Loss = 1.1907e-02, PNorm = 138.2175, GNorm = 0.3446, lr_0 = 4.8593e-04
Loss = 1.1574e-02, PNorm = 138.2444, GNorm = 0.1375, lr_0 = 4.8560e-04
Loss = 1.1421e-02, PNorm = 138.2711, GNorm = 0.4172, lr_0 = 4.8527e-04
Loss = 1.2054e-02, PNorm = 138.2984, GNorm = 0.4592, lr_0 = 4.8494e-04
Loss = 1.0043e-02, PNorm = 138.3252, GNorm = 0.2136, lr_0 = 4.8460e-04
Loss = 1.1940e-02, PNorm = 138.3459, GNorm = 0.4664, lr_0 = 4.8427e-04
Loss = 1.3073e-02, PNorm = 138.3679, GNorm = 0.1588, lr_0 = 4.8394e-04
Loss = 1.2344e-02, PNorm = 138.3967, GNorm = 0.2477, lr_0 = 4.8361e-04
Loss = 1.7744e-02, PNorm = 138.4265, GNorm = 0.3791, lr_0 = 4.8328e-04
Loss = 1.4258e-02, PNorm = 138.4608, GNorm = 1.0880, lr_0 = 4.8295e-04
Loss = 1.1786e-02, PNorm = 138.4902, GNorm = 0.6125, lr_0 = 4.8262e-04
Loss = 1.4907e-02, PNorm = 138.5245, GNorm = 0.3584, lr_0 = 4.8228e-04
Loss = 1.3003e-02, PNorm = 138.5616, GNorm = 0.1149, lr_0 = 4.8195e-04
Loss = 1.1558e-02, PNorm = 138.5934, GNorm = 0.4353, lr_0 = 4.8162e-04
Loss = 1.2376e-02, PNorm = 138.6222, GNorm = 0.2726, lr_0 = 4.8129e-04
Loss = 1.4062e-02, PNorm = 138.6487, GNorm = 0.3799, lr_0 = 4.8096e-04
Loss = 1.0869e-02, PNorm = 138.6747, GNorm = 0.1247, lr_0 = 4.8064e-04
Loss = 1.3811e-02, PNorm = 138.7044, GNorm = 0.4831, lr_0 = 4.8031e-04
Loss = 1.3194e-02, PNorm = 138.7383, GNorm = 0.3934, lr_0 = 4.7998e-04
Loss = 1.4763e-02, PNorm = 138.7781, GNorm = 0.3192, lr_0 = 4.7965e-04
Loss = 1.0231e-02, PNorm = 138.8152, GNorm = 0.2258, lr_0 = 4.7932e-04
Loss = 1.2142e-02, PNorm = 138.8479, GNorm = 0.2266, lr_0 = 4.7899e-04
Loss = 1.4147e-02, PNorm = 138.8757, GNorm = 0.7541, lr_0 = 4.7866e-04
Loss = 1.1208e-02, PNorm = 138.9041, GNorm = 0.2473, lr_0 = 4.7833e-04
Loss = 1.1823e-02, PNorm = 138.9333, GNorm = 0.4815, lr_0 = 4.7801e-04
Loss = 1.3728e-02, PNorm = 138.9657, GNorm = 0.3322, lr_0 = 4.7768e-04
Loss = 1.3140e-02, PNorm = 138.9966, GNorm = 0.1122, lr_0 = 4.7735e-04
Loss = 1.2019e-02, PNorm = 139.0233, GNorm = 0.1851, lr_0 = 4.7703e-04
Validation mae = 0.482328
Epoch 11
Loss = 1.1081e-02, PNorm = 139.0513, GNorm = 0.7202, lr_0 = 4.7670e-04
Loss = 1.2896e-02, PNorm = 139.0687, GNorm = 0.1978, lr_0 = 4.7637e-04
Loss = 1.1102e-02, PNorm = 139.0869, GNorm = 0.1940, lr_0 = 4.7605e-04
Loss = 1.2151e-02, PNorm = 139.1125, GNorm = 0.1687, lr_0 = 4.7572e-04
Loss = 1.1450e-02, PNorm = 139.1401, GNorm = 0.4114, lr_0 = 4.7539e-04
Loss = 1.1060e-02, PNorm = 139.1602, GNorm = 0.4289, lr_0 = 4.7507e-04
Loss = 1.0806e-02, PNorm = 139.1816, GNorm = 0.2552, lr_0 = 4.7474e-04
Loss = 1.2030e-02, PNorm = 139.2030, GNorm = 0.1634, lr_0 = 4.7442e-04
Loss = 9.4274e-03, PNorm = 139.2267, GNorm = 0.1254, lr_0 = 4.7409e-04
Loss = 9.2924e-03, PNorm = 139.2502, GNorm = 0.3056, lr_0 = 4.7377e-04
Loss = 1.0524e-02, PNorm = 139.2739, GNorm = 0.3081, lr_0 = 4.7344e-04
Loss = 1.2825e-02, PNorm = 139.2969, GNorm = 0.2778, lr_0 = 4.7312e-04
Loss = 1.0641e-02, PNorm = 139.3192, GNorm = 0.4256, lr_0 = 4.7279e-04
Loss = 1.0068e-02, PNorm = 139.3440, GNorm = 0.3394, lr_0 = 4.7247e-04
Loss = 1.1069e-02, PNorm = 139.3714, GNorm = 0.1356, lr_0 = 4.7215e-04
Loss = 1.2397e-02, PNorm = 139.3949, GNorm = 0.1136, lr_0 = 4.7182e-04
Loss = 1.0679e-02, PNorm = 139.4182, GNorm = 0.2247, lr_0 = 4.7150e-04
Loss = 1.0888e-02, PNorm = 139.4329, GNorm = 0.0815, lr_0 = 4.7118e-04
Loss = 8.8628e-03, PNorm = 139.4473, GNorm = 0.2383, lr_0 = 4.7085e-04
Loss = 1.1570e-02, PNorm = 139.4668, GNorm = 0.0921, lr_0 = 4.7053e-04
Loss = 1.0620e-02, PNorm = 139.4888, GNorm = 0.2713, lr_0 = 4.7021e-04
Loss = 1.0719e-02, PNorm = 139.5138, GNorm = 0.1028, lr_0 = 4.6989e-04
Loss = 1.0586e-02, PNorm = 139.5361, GNorm = 0.3427, lr_0 = 4.6957e-04
Loss = 1.1113e-02, PNorm = 139.5612, GNorm = 0.1898, lr_0 = 4.6924e-04
Loss = 1.1286e-02, PNorm = 139.5827, GNorm = 0.4804, lr_0 = 4.6892e-04
Loss = 8.5611e-03, PNorm = 139.6059, GNorm = 0.4412, lr_0 = 4.6860e-04
Loss = 1.1304e-02, PNorm = 139.6300, GNorm = 0.2072, lr_0 = 4.6828e-04
Loss = 8.9288e-03, PNorm = 139.6526, GNorm = 0.1464, lr_0 = 4.6796e-04
Loss = 1.0428e-02, PNorm = 139.6745, GNorm = 0.1593, lr_0 = 4.6764e-04
Loss = 8.6221e-03, PNorm = 139.6979, GNorm = 0.1421, lr_0 = 4.6732e-04
Loss = 8.5657e-03, PNorm = 139.7241, GNorm = 0.2382, lr_0 = 4.6700e-04
Loss = 9.4600e-03, PNorm = 139.7445, GNorm = 0.2140, lr_0 = 4.6668e-04
Loss = 9.4980e-03, PNorm = 139.7606, GNorm = 0.3455, lr_0 = 4.6636e-04
Loss = 8.2086e-03, PNorm = 139.7790, GNorm = 0.3307, lr_0 = 4.6604e-04
Loss = 1.1063e-02, PNorm = 139.8054, GNorm = 0.1944, lr_0 = 4.6572e-04
Loss = 1.1007e-02, PNorm = 139.8320, GNorm = 0.1827, lr_0 = 4.6540e-04
Loss = 7.3809e-03, PNorm = 139.8507, GNorm = 0.2492, lr_0 = 4.6508e-04
Loss = 1.0724e-02, PNorm = 139.8688, GNorm = 0.4465, lr_0 = 4.6476e-04
Loss = 1.3016e-02, PNorm = 139.8896, GNorm = 0.1127, lr_0 = 4.6445e-04
Loss = 9.2081e-03, PNorm = 139.9173, GNorm = 0.3365, lr_0 = 4.6413e-04
Loss = 1.0306e-02, PNorm = 139.9380, GNorm = 0.1442, lr_0 = 4.6381e-04
Loss = 8.6797e-03, PNorm = 139.9567, GNorm = 0.0963, lr_0 = 4.6349e-04
Loss = 9.2103e-03, PNorm = 139.9764, GNorm = 0.2561, lr_0 = 4.6317e-04
Loss = 1.0962e-02, PNorm = 140.0032, GNorm = 0.1481, lr_0 = 4.6286e-04
Loss = 1.0269e-02, PNorm = 140.0279, GNorm = 0.1606, lr_0 = 4.6254e-04
Loss = 1.1517e-02, PNorm = 140.0502, GNorm = 0.1511, lr_0 = 4.6222e-04
Loss = 9.4903e-03, PNorm = 140.0676, GNorm = 0.2385, lr_0 = 4.6191e-04
Loss = 9.5352e-03, PNorm = 140.0841, GNorm = 0.1805, lr_0 = 4.6159e-04
Loss = 9.8863e-03, PNorm = 140.1049, GNorm = 0.2812, lr_0 = 4.6127e-04
Loss = 7.8601e-03, PNorm = 140.1273, GNorm = 0.2784, lr_0 = 4.6096e-04
Loss = 9.3117e-03, PNorm = 140.1552, GNorm = 0.1460, lr_0 = 4.6064e-04
Loss = 1.0104e-02, PNorm = 140.1852, GNorm = 0.1553, lr_0 = 4.6033e-04
Loss = 9.3038e-03, PNorm = 140.2050, GNorm = 0.2663, lr_0 = 4.6001e-04
Loss = 1.2623e-02, PNorm = 140.2287, GNorm = 0.5087, lr_0 = 4.5970e-04
Loss = 9.7915e-03, PNorm = 140.2547, GNorm = 0.4278, lr_0 = 4.5938e-04
Loss = 9.3450e-03, PNorm = 140.2797, GNorm = 0.1588, lr_0 = 4.5907e-04
Loss = 9.1398e-03, PNorm = 140.2992, GNorm = 0.1849, lr_0 = 4.5875e-04
Loss = 9.0743e-03, PNorm = 140.3203, GNorm = 0.1367, lr_0 = 4.5844e-04
Loss = 1.2294e-02, PNorm = 140.3420, GNorm = 0.2278, lr_0 = 4.5812e-04
Loss = 8.6329e-03, PNorm = 140.3648, GNorm = 0.1809, lr_0 = 4.5781e-04
Loss = 9.8535e-03, PNorm = 140.3866, GNorm = 0.1997, lr_0 = 4.5750e-04
Loss = 1.0432e-02, PNorm = 140.4069, GNorm = 0.2595, lr_0 = 4.5718e-04
Loss = 9.9600e-03, PNorm = 140.4267, GNorm = 0.1830, lr_0 = 4.5687e-04
Loss = 9.3874e-03, PNorm = 140.4452, GNorm = 0.1055, lr_0 = 4.5656e-04
Loss = 9.2803e-03, PNorm = 140.4649, GNorm = 0.6021, lr_0 = 4.5624e-04
Loss = 1.1373e-02, PNorm = 140.4881, GNorm = 0.2950, lr_0 = 4.5593e-04
Loss = 1.2405e-02, PNorm = 140.5161, GNorm = 0.3317, lr_0 = 4.5562e-04
Loss = 1.1311e-02, PNorm = 140.5395, GNorm = 0.3183, lr_0 = 4.5531e-04
Loss = 9.5972e-03, PNorm = 140.5615, GNorm = 0.3316, lr_0 = 4.5499e-04
Loss = 9.1667e-03, PNorm = 140.5875, GNorm = 0.2865, lr_0 = 4.5468e-04
Loss = 1.1142e-02, PNorm = 140.6111, GNorm = 0.2113, lr_0 = 4.5437e-04
Loss = 1.0377e-02, PNorm = 140.6329, GNorm = 0.2380, lr_0 = 4.5406e-04
Loss = 9.3190e-03, PNorm = 140.6572, GNorm = 0.5067, lr_0 = 4.5375e-04
Loss = 1.0281e-02, PNorm = 140.6780, GNorm = 0.3703, lr_0 = 4.5344e-04
Loss = 1.5604e-02, PNorm = 140.7013, GNorm = 0.1839, lr_0 = 4.5313e-04
Loss = 9.1445e-03, PNorm = 140.7288, GNorm = 0.2721, lr_0 = 4.5282e-04
Loss = 9.5176e-03, PNorm = 140.7543, GNorm = 0.2639, lr_0 = 4.5251e-04
Loss = 1.0732e-02, PNorm = 140.7810, GNorm = 0.1680, lr_0 = 4.5220e-04
Loss = 1.3348e-02, PNorm = 140.8119, GNorm = 0.2900, lr_0 = 4.5189e-04
Loss = 8.7049e-03, PNorm = 140.8414, GNorm = 0.2080, lr_0 = 4.5158e-04
Loss = 8.4149e-03, PNorm = 140.8649, GNorm = 0.3283, lr_0 = 4.5127e-04
Loss = 1.0159e-02, PNorm = 140.8894, GNorm = 0.2661, lr_0 = 4.5096e-04
Loss = 8.5274e-03, PNorm = 140.9122, GNorm = 0.4933, lr_0 = 4.5065e-04
Loss = 1.3676e-02, PNorm = 140.9332, GNorm = 0.3035, lr_0 = 4.5034e-04
Loss = 1.0054e-02, PNorm = 140.9542, GNorm = 0.2159, lr_0 = 4.5003e-04
Loss = 1.0152e-02, PNorm = 140.9764, GNorm = 0.1097, lr_0 = 4.4972e-04
Loss = 1.1618e-02, PNorm = 140.9976, GNorm = 0.3868, lr_0 = 4.4942e-04
Loss = 1.1383e-02, PNorm = 141.0217, GNorm = 0.1647, lr_0 = 4.4911e-04
Loss = 9.6081e-03, PNorm = 141.0510, GNorm = 0.3259, lr_0 = 4.4880e-04
Loss = 1.0055e-02, PNorm = 141.0805, GNorm = 0.2600, lr_0 = 4.4849e-04
Loss = 9.7188e-03, PNorm = 141.1048, GNorm = 0.6233, lr_0 = 4.4819e-04
Loss = 9.3038e-03, PNorm = 141.1289, GNorm = 0.1038, lr_0 = 4.4788e-04
Loss = 1.0047e-02, PNorm = 141.1491, GNorm = 0.0922, lr_0 = 4.4757e-04
Loss = 9.4088e-03, PNorm = 141.1735, GNorm = 0.1763, lr_0 = 4.4727e-04
Loss = 9.1007e-03, PNorm = 141.1985, GNorm = 0.5654, lr_0 = 4.4696e-04
Loss = 1.1394e-02, PNorm = 141.2246, GNorm = 0.1713, lr_0 = 4.4665e-04
Loss = 9.6081e-03, PNorm = 141.2466, GNorm = 0.1455, lr_0 = 4.4635e-04
Loss = 1.0015e-02, PNorm = 141.2689, GNorm = 0.6193, lr_0 = 4.4604e-04
Loss = 9.6412e-03, PNorm = 141.2964, GNorm = 0.1242, lr_0 = 4.4574e-04
Loss = 1.6781e-02, PNorm = 141.3201, GNorm = 0.2530, lr_0 = 4.4543e-04
Loss = 1.1657e-02, PNorm = 141.3478, GNorm = 1.0489, lr_0 = 4.4513e-04
Loss = 1.2721e-02, PNorm = 141.3726, GNorm = 0.3366, lr_0 = 4.4482e-04
Loss = 1.0129e-02, PNorm = 141.3988, GNorm = 0.4223, lr_0 = 4.4452e-04
Loss = 9.4811e-03, PNorm = 141.4243, GNorm = 0.6682, lr_0 = 4.4421e-04
Loss = 1.2351e-02, PNorm = 141.4514, GNorm = 0.3823, lr_0 = 4.4391e-04
Loss = 1.0179e-02, PNorm = 141.4772, GNorm = 0.1792, lr_0 = 4.4360e-04
Loss = 9.5861e-03, PNorm = 141.5077, GNorm = 0.3454, lr_0 = 4.4330e-04
Loss = 9.9981e-03, PNorm = 141.5357, GNorm = 0.1635, lr_0 = 4.4299e-04
Loss = 9.2194e-03, PNorm = 141.5617, GNorm = 0.2124, lr_0 = 4.4269e-04
Loss = 1.2955e-02, PNorm = 141.5887, GNorm = 0.2514, lr_0 = 4.4239e-04
Loss = 8.1478e-03, PNorm = 141.6156, GNorm = 0.2576, lr_0 = 4.4209e-04
Loss = 9.0579e-03, PNorm = 141.6365, GNorm = 0.2454, lr_0 = 4.4178e-04
Loss = 1.0156e-02, PNorm = 141.6568, GNorm = 0.1936, lr_0 = 4.4148e-04
Loss = 1.4153e-02, PNorm = 141.6811, GNorm = 0.1250, lr_0 = 4.4118e-04
Loss = 1.0789e-02, PNorm = 141.7068, GNorm = 0.1483, lr_0 = 4.4088e-04
Loss = 1.0657e-02, PNorm = 141.7311, GNorm = 0.1838, lr_0 = 4.4057e-04
Loss = 8.8753e-03, PNorm = 141.7502, GNorm = 0.1416, lr_0 = 4.4027e-04
Loss = 1.0017e-02, PNorm = 141.7698, GNorm = 0.4605, lr_0 = 4.3997e-04
Loss = 1.0580e-02, PNorm = 141.7921, GNorm = 0.1684, lr_0 = 4.3967e-04
Loss = 1.3903e-02, PNorm = 141.8201, GNorm = 0.4491, lr_0 = 4.3937e-04
Validation mae = 0.478875
Epoch 12
Loss = 9.9441e-03, PNorm = 141.8459, GNorm = 0.4818, lr_0 = 4.3907e-04
Loss = 1.0153e-02, PNorm = 141.8691, GNorm = 0.1162, lr_0 = 4.3877e-04
Loss = 1.1421e-02, PNorm = 141.8867, GNorm = 0.3216, lr_0 = 4.3846e-04
Loss = 1.0090e-02, PNorm = 141.9044, GNorm = 0.3294, lr_0 = 4.3816e-04
Loss = 8.8881e-03, PNorm = 141.9203, GNorm = 0.3253, lr_0 = 4.3786e-04
Loss = 9.3850e-03, PNorm = 141.9388, GNorm = 0.4249, lr_0 = 4.3756e-04
Loss = 9.2570e-03, PNorm = 141.9605, GNorm = 0.4619, lr_0 = 4.3726e-04
Loss = 8.2472e-03, PNorm = 141.9792, GNorm = 0.4481, lr_0 = 4.3696e-04
Loss = 8.4133e-03, PNorm = 141.9960, GNorm = 0.1308, lr_0 = 4.3667e-04
Loss = 7.7924e-03, PNorm = 142.0085, GNorm = 0.1476, lr_0 = 4.3637e-04
Loss = 8.5597e-03, PNorm = 142.0265, GNorm = 0.3140, lr_0 = 4.3607e-04
Loss = 1.0149e-02, PNorm = 142.0418, GNorm = 0.1133, lr_0 = 4.3577e-04
Loss = 9.4439e-03, PNorm = 142.0597, GNorm = 0.2323, lr_0 = 4.3547e-04
Loss = 9.3918e-03, PNorm = 142.0751, GNorm = 0.3734, lr_0 = 4.3517e-04
Loss = 8.4165e-03, PNorm = 142.0926, GNorm = 0.2388, lr_0 = 4.3487e-04
Loss = 7.9486e-03, PNorm = 142.1137, GNorm = 0.1992, lr_0 = 4.3458e-04
Loss = 7.0561e-03, PNorm = 142.1314, GNorm = 0.3572, lr_0 = 4.3428e-04
Loss = 9.5462e-03, PNorm = 142.1491, GNorm = 0.2011, lr_0 = 4.3398e-04
Loss = 9.3962e-03, PNorm = 142.1623, GNorm = 0.2652, lr_0 = 4.3368e-04
Loss = 8.9871e-03, PNorm = 142.1850, GNorm = 0.3504, lr_0 = 4.3339e-04
Loss = 6.9813e-03, PNorm = 142.2063, GNorm = 0.3521, lr_0 = 4.3309e-04
Loss = 8.3763e-03, PNorm = 142.2270, GNorm = 0.3607, lr_0 = 4.3279e-04
Loss = 8.4794e-03, PNorm = 142.2454, GNorm = 0.2327, lr_0 = 4.3250e-04
Loss = 8.6225e-03, PNorm = 142.2635, GNorm = 0.2012, lr_0 = 4.3220e-04
Loss = 7.7995e-03, PNorm = 142.2796, GNorm = 0.3437, lr_0 = 4.3190e-04
Loss = 8.8861e-03, PNorm = 142.2947, GNorm = 0.3127, lr_0 = 4.3161e-04
Loss = 8.2921e-03, PNorm = 142.3135, GNorm = 0.3527, lr_0 = 4.3131e-04
Loss = 7.7479e-03, PNorm = 142.3328, GNorm = 0.2107, lr_0 = 4.3102e-04
Loss = 9.2380e-03, PNorm = 142.3558, GNorm = 0.4087, lr_0 = 4.3072e-04
Loss = 7.5992e-03, PNorm = 142.3739, GNorm = 0.2377, lr_0 = 4.3043e-04
Loss = 8.0099e-03, PNorm = 142.3896, GNorm = 0.3750, lr_0 = 4.3013e-04
Loss = 1.1011e-02, PNorm = 142.4092, GNorm = 0.7585, lr_0 = 4.2984e-04
Loss = 9.3381e-03, PNorm = 142.4251, GNorm = 0.1487, lr_0 = 4.2954e-04
Loss = 7.8243e-03, PNorm = 142.4380, GNorm = 0.3919, lr_0 = 4.2925e-04
Loss = 8.4638e-03, PNorm = 142.4542, GNorm = 0.1106, lr_0 = 4.2895e-04
Loss = 9.6833e-03, PNorm = 142.4720, GNorm = 0.2335, lr_0 = 4.2866e-04
Loss = 9.8281e-03, PNorm = 142.4931, GNorm = 0.2855, lr_0 = 4.2837e-04
Loss = 8.7587e-03, PNorm = 142.5116, GNorm = 0.1659, lr_0 = 4.2807e-04
Loss = 7.5780e-03, PNorm = 142.5264, GNorm = 0.2524, lr_0 = 4.2778e-04
Loss = 9.0348e-03, PNorm = 142.5426, GNorm = 0.2324, lr_0 = 4.2749e-04
Loss = 7.4228e-03, PNorm = 142.5590, GNorm = 0.1287, lr_0 = 4.2719e-04
Loss = 7.8088e-03, PNorm = 142.5765, GNorm = 0.5028, lr_0 = 4.2690e-04
Loss = 1.2951e-02, PNorm = 142.5968, GNorm = 0.1119, lr_0 = 4.2661e-04
Loss = 9.7583e-03, PNorm = 142.6211, GNorm = 0.3414, lr_0 = 4.2632e-04
Loss = 8.7333e-03, PNorm = 142.6448, GNorm = 0.3035, lr_0 = 4.2602e-04
Loss = 9.1148e-03, PNorm = 142.6617, GNorm = 0.2110, lr_0 = 4.2573e-04
Loss = 7.5842e-03, PNorm = 142.6803, GNorm = 0.4929, lr_0 = 4.2544e-04
Loss = 7.1105e-03, PNorm = 142.6961, GNorm = 0.1817, lr_0 = 4.2515e-04
Loss = 9.6953e-03, PNorm = 142.7171, GNorm = 0.2247, lr_0 = 4.2486e-04
Loss = 7.7295e-03, PNorm = 142.7387, GNorm = 0.2562, lr_0 = 4.2457e-04
Loss = 8.0052e-03, PNorm = 142.7576, GNorm = 0.2466, lr_0 = 4.2428e-04
Loss = 7.4407e-03, PNorm = 142.7745, GNorm = 0.1224, lr_0 = 4.2399e-04
Loss = 7.4020e-03, PNorm = 142.7911, GNorm = 0.2094, lr_0 = 4.2370e-04
Loss = 1.0401e-02, PNorm = 142.8086, GNorm = 0.1253, lr_0 = 4.2340e-04
Loss = 8.5904e-03, PNorm = 142.8274, GNorm = 0.1321, lr_0 = 4.2311e-04
Loss = 9.8958e-03, PNorm = 142.8483, GNorm = 0.3082, lr_0 = 4.2283e-04
Loss = 8.9009e-03, PNorm = 142.8650, GNorm = 0.1839, lr_0 = 4.2254e-04
Loss = 9.3504e-03, PNorm = 142.8844, GNorm = 0.2930, lr_0 = 4.2225e-04
Loss = 9.3315e-03, PNorm = 142.9003, GNorm = 0.3548, lr_0 = 4.2196e-04
Loss = 8.5168e-03, PNorm = 142.9196, GNorm = 0.2708, lr_0 = 4.2167e-04
Loss = 7.9146e-03, PNorm = 142.9372, GNorm = 0.2557, lr_0 = 4.2138e-04
Loss = 9.0733e-03, PNorm = 142.9582, GNorm = 0.1366, lr_0 = 4.2109e-04
Loss = 8.3128e-03, PNorm = 142.9765, GNorm = 0.3488, lr_0 = 4.2080e-04
Loss = 9.6742e-03, PNorm = 142.9979, GNorm = 0.1576, lr_0 = 4.2051e-04
Loss = 1.0374e-02, PNorm = 143.0189, GNorm = 0.3775, lr_0 = 4.2023e-04
Loss = 1.0134e-02, PNorm = 143.0376, GNorm = 0.2695, lr_0 = 4.1994e-04
Loss = 9.2412e-03, PNorm = 143.0575, GNorm = 0.2201, lr_0 = 4.1965e-04
Loss = 9.2173e-03, PNorm = 143.0844, GNorm = 0.3355, lr_0 = 4.1936e-04
Loss = 9.4159e-03, PNorm = 143.1085, GNorm = 0.1126, lr_0 = 4.1907e-04
Loss = 6.9305e-03, PNorm = 143.1274, GNorm = 0.2710, lr_0 = 4.1879e-04
Loss = 8.5636e-03, PNorm = 143.1463, GNorm = 0.2056, lr_0 = 4.1850e-04
Loss = 8.4429e-03, PNorm = 143.1667, GNorm = 0.1953, lr_0 = 4.1821e-04
Loss = 7.7020e-03, PNorm = 143.1853, GNorm = 0.0853, lr_0 = 4.1793e-04
Loss = 9.9936e-03, PNorm = 143.2074, GNorm = 0.2455, lr_0 = 4.1764e-04
Loss = 8.2450e-03, PNorm = 143.2310, GNorm = 0.1553, lr_0 = 4.1736e-04
Loss = 8.3172e-03, PNorm = 143.2516, GNorm = 0.2414, lr_0 = 4.1707e-04
Loss = 1.3154e-02, PNorm = 143.2696, GNorm = 0.1540, lr_0 = 4.1678e-04
Loss = 7.7134e-03, PNorm = 143.2870, GNorm = 0.1477, lr_0 = 4.1650e-04
Loss = 9.8898e-03, PNorm = 143.3073, GNorm = 0.1818, lr_0 = 4.1621e-04
Loss = 1.1618e-02, PNorm = 143.3281, GNorm = 0.1259, lr_0 = 4.1593e-04
Loss = 9.7476e-03, PNorm = 143.3527, GNorm = 0.1786, lr_0 = 4.1564e-04
Loss = 1.2019e-02, PNorm = 143.3767, GNorm = 0.2921, lr_0 = 4.1536e-04
Loss = 7.9133e-03, PNorm = 143.4010, GNorm = 0.1899, lr_0 = 4.1507e-04
Loss = 7.2296e-03, PNorm = 143.4220, GNorm = 0.3284, lr_0 = 4.1479e-04
Loss = 8.0487e-03, PNorm = 143.4423, GNorm = 0.1917, lr_0 = 4.1450e-04
Loss = 8.4202e-03, PNorm = 143.4653, GNorm = 0.1580, lr_0 = 4.1422e-04
Loss = 8.8671e-03, PNorm = 143.4837, GNorm = 0.5026, lr_0 = 4.1394e-04
Loss = 8.7148e-03, PNorm = 143.5085, GNorm = 0.2192, lr_0 = 4.1365e-04
Loss = 1.2634e-02, PNorm = 143.5318, GNorm = 0.2767, lr_0 = 4.1337e-04
Loss = 8.1750e-03, PNorm = 143.5487, GNorm = 0.1917, lr_0 = 4.1309e-04
Loss = 8.4645e-03, PNorm = 143.5741, GNorm = 0.3718, lr_0 = 4.1280e-04
Loss = 7.4175e-03, PNorm = 143.5974, GNorm = 0.2832, lr_0 = 4.1252e-04
Loss = 8.2749e-03, PNorm = 143.6190, GNorm = 0.4154, lr_0 = 4.1224e-04
Loss = 8.7553e-03, PNorm = 143.6411, GNorm = 0.2562, lr_0 = 4.1196e-04
Loss = 8.1972e-03, PNorm = 143.6590, GNorm = 0.1501, lr_0 = 4.1167e-04
Loss = 9.1743e-03, PNorm = 143.6788, GNorm = 0.3047, lr_0 = 4.1139e-04
Loss = 8.2234e-03, PNorm = 143.7043, GNorm = 0.1100, lr_0 = 4.1111e-04
Loss = 7.6324e-03, PNorm = 143.7252, GNorm = 0.2223, lr_0 = 4.1083e-04
Loss = 8.7642e-03, PNorm = 143.7446, GNorm = 0.1972, lr_0 = 4.1055e-04
Loss = 8.5495e-03, PNorm = 143.7678, GNorm = 0.1617, lr_0 = 4.1027e-04
Loss = 7.2971e-03, PNorm = 143.7887, GNorm = 0.1829, lr_0 = 4.0998e-04
Loss = 7.1696e-03, PNorm = 143.8112, GNorm = 0.2228, lr_0 = 4.0970e-04
Loss = 7.8621e-03, PNorm = 143.8271, GNorm = 0.1096, lr_0 = 4.0942e-04
Loss = 7.0776e-03, PNorm = 143.8428, GNorm = 0.1021, lr_0 = 4.0914e-04
Loss = 8.8879e-03, PNorm = 143.8593, GNorm = 0.4526, lr_0 = 4.0886e-04
Loss = 7.3510e-03, PNorm = 143.8798, GNorm = 0.1272, lr_0 = 4.0858e-04
Loss = 9.8686e-03, PNorm = 143.9023, GNorm = 0.2804, lr_0 = 4.0830e-04
Loss = 8.0883e-03, PNorm = 143.9221, GNorm = 0.4235, lr_0 = 4.0802e-04
Loss = 8.1535e-03, PNorm = 143.9409, GNorm = 0.1943, lr_0 = 4.0774e-04
Loss = 1.0612e-02, PNorm = 143.9647, GNorm = 0.3375, lr_0 = 4.0746e-04
Loss = 9.9064e-03, PNorm = 143.9898, GNorm = 0.0845, lr_0 = 4.0718e-04
Loss = 9.2278e-03, PNorm = 144.0112, GNorm = 0.2307, lr_0 = 4.0691e-04
Loss = 7.0577e-03, PNorm = 144.0307, GNorm = 0.2812, lr_0 = 4.0663e-04
Loss = 9.5744e-03, PNorm = 144.0498, GNorm = 0.3438, lr_0 = 4.0635e-04
Loss = 7.6230e-03, PNorm = 144.0681, GNorm = 0.3861, lr_0 = 4.0607e-04
Loss = 9.2859e-03, PNorm = 144.0878, GNorm = 0.5060, lr_0 = 4.0579e-04
Loss = 1.0388e-02, PNorm = 144.1096, GNorm = 0.4540, lr_0 = 4.0551e-04
Loss = 1.1389e-02, PNorm = 144.1310, GNorm = 0.6200, lr_0 = 4.0524e-04
Loss = 1.0009e-02, PNorm = 144.1554, GNorm = 0.2705, lr_0 = 4.0496e-04
Loss = 7.4810e-03, PNorm = 144.1792, GNorm = 0.2259, lr_0 = 4.0468e-04
Validation mae = 0.480332
Epoch 13
Loss = 8.2859e-03, PNorm = 144.1935, GNorm = 0.3501, lr_0 = 4.0440e-04
Loss = 7.2654e-03, PNorm = 144.2085, GNorm = 0.2124, lr_0 = 4.0413e-04
Loss = 7.2520e-03, PNorm = 144.2215, GNorm = 0.3658, lr_0 = 4.0385e-04
Loss = 6.9533e-03, PNorm = 144.2364, GNorm = 0.1964, lr_0 = 4.0357e-04
Loss = 7.4520e-03, PNorm = 144.2541, GNorm = 0.2934, lr_0 = 4.0330e-04
Loss = 6.0826e-03, PNorm = 144.2697, GNorm = 0.3082, lr_0 = 4.0302e-04
Loss = 7.3970e-03, PNorm = 144.2783, GNorm = 0.2505, lr_0 = 4.0274e-04
Loss = 8.0486e-03, PNorm = 144.2892, GNorm = 0.3249, lr_0 = 4.0247e-04
Loss = 7.0342e-03, PNorm = 144.3030, GNorm = 0.1571, lr_0 = 4.0219e-04
Loss = 6.5916e-03, PNorm = 144.3203, GNorm = 0.0789, lr_0 = 4.0192e-04
Loss = 7.4729e-03, PNorm = 144.3363, GNorm = 0.2659, lr_0 = 4.0164e-04
Loss = 7.5580e-03, PNorm = 144.3473, GNorm = 0.1010, lr_0 = 4.0137e-04
Loss = 6.6360e-03, PNorm = 144.3603, GNorm = 0.3053, lr_0 = 4.0109e-04
Loss = 6.9646e-03, PNorm = 144.3771, GNorm = 0.2153, lr_0 = 4.0082e-04
Loss = 6.8294e-03, PNorm = 144.3896, GNorm = 0.0778, lr_0 = 4.0054e-04
Loss = 7.0942e-03, PNorm = 144.4033, GNorm = 0.2924, lr_0 = 4.0027e-04
Loss = 7.1483e-03, PNorm = 144.4225, GNorm = 0.1725, lr_0 = 3.9999e-04
Loss = 5.7056e-03, PNorm = 144.4411, GNorm = 0.2293, lr_0 = 3.9972e-04
Loss = 8.5423e-03, PNorm = 144.4561, GNorm = 0.3307, lr_0 = 3.9945e-04
Loss = 6.3539e-03, PNorm = 144.4736, GNorm = 0.0771, lr_0 = 3.9917e-04
Loss = 7.7588e-03, PNorm = 144.4884, GNorm = 0.2926, lr_0 = 3.9890e-04
Loss = 6.8590e-03, PNorm = 144.4998, GNorm = 0.0755, lr_0 = 3.9863e-04
Loss = 8.7194e-03, PNorm = 144.5129, GNorm = 0.2113, lr_0 = 3.9835e-04
Loss = 8.0184e-03, PNorm = 144.5318, GNorm = 0.1378, lr_0 = 3.9808e-04
Loss = 7.6442e-03, PNorm = 144.5502, GNorm = 0.6086, lr_0 = 3.9781e-04
Loss = 6.7865e-03, PNorm = 144.5659, GNorm = 0.2395, lr_0 = 3.9753e-04
Loss = 6.0232e-03, PNorm = 144.5828, GNorm = 0.3266, lr_0 = 3.9726e-04
Loss = 7.0804e-03, PNorm = 144.6010, GNorm = 0.1078, lr_0 = 3.9699e-04
Loss = 7.7060e-03, PNorm = 144.6131, GNorm = 0.2134, lr_0 = 3.9672e-04
Loss = 6.6180e-03, PNorm = 144.6287, GNorm = 0.2088, lr_0 = 3.9645e-04
Loss = 5.6646e-03, PNorm = 144.6402, GNorm = 0.1968, lr_0 = 3.9617e-04
Loss = 6.8852e-03, PNorm = 144.6514, GNorm = 0.3063, lr_0 = 3.9590e-04
Loss = 6.1919e-03, PNorm = 144.6646, GNorm = 0.0622, lr_0 = 3.9563e-04
Loss = 5.9151e-03, PNorm = 144.6792, GNorm = 0.3220, lr_0 = 3.9536e-04
Loss = 7.2689e-03, PNorm = 144.6938, GNorm = 0.2432, lr_0 = 3.9509e-04
Loss = 5.8206e-03, PNorm = 144.7104, GNorm = 0.1916, lr_0 = 3.9482e-04
Loss = 7.6976e-03, PNorm = 144.7237, GNorm = 0.2220, lr_0 = 3.9455e-04
Loss = 7.9608e-03, PNorm = 144.7370, GNorm = 0.4346, lr_0 = 3.9428e-04
Loss = 7.3794e-03, PNorm = 144.7523, GNorm = 0.0812, lr_0 = 3.9401e-04
Loss = 6.7447e-03, PNorm = 144.7687, GNorm = 0.2112, lr_0 = 3.9374e-04
Loss = 6.1148e-03, PNorm = 144.7860, GNorm = 0.3698, lr_0 = 3.9347e-04
Loss = 7.8159e-03, PNorm = 144.7994, GNorm = 0.1614, lr_0 = 3.9320e-04
Loss = 9.8112e-03, PNorm = 144.8159, GNorm = 0.4816, lr_0 = 3.9293e-04
Loss = 7.0603e-03, PNorm = 144.8305, GNorm = 0.3697, lr_0 = 3.9266e-04
Loss = 8.0369e-03, PNorm = 144.8451, GNorm = 0.3227, lr_0 = 3.9239e-04
Loss = 1.0185e-02, PNorm = 144.8580, GNorm = 0.2846, lr_0 = 3.9212e-04
Loss = 5.8908e-03, PNorm = 144.8763, GNorm = 0.1980, lr_0 = 3.9185e-04
Loss = 6.7629e-03, PNorm = 144.8899, GNorm = 0.3079, lr_0 = 3.9159e-04
Loss = 7.6810e-03, PNorm = 144.9057, GNorm = 0.1465, lr_0 = 3.9132e-04
Loss = 7.5094e-03, PNorm = 144.9222, GNorm = 0.3249, lr_0 = 3.9105e-04
Loss = 6.0683e-03, PNorm = 144.9403, GNorm = 0.1028, lr_0 = 3.9078e-04
Loss = 8.0684e-03, PNorm = 144.9549, GNorm = 0.2436, lr_0 = 3.9051e-04
Loss = 6.6221e-03, PNorm = 144.9705, GNorm = 0.2454, lr_0 = 3.9025e-04
Loss = 8.2363e-03, PNorm = 144.9835, GNorm = 0.1821, lr_0 = 3.8998e-04
Loss = 8.0693e-03, PNorm = 144.9966, GNorm = 0.2735, lr_0 = 3.8971e-04
Loss = 7.1903e-03, PNorm = 145.0157, GNorm = 0.7018, lr_0 = 3.8945e-04
Loss = 9.1084e-03, PNorm = 145.0340, GNorm = 0.2556, lr_0 = 3.8918e-04
Loss = 1.0014e-02, PNorm = 145.0503, GNorm = 0.6780, lr_0 = 3.8891e-04
Loss = 7.7002e-03, PNorm = 145.0740, GNorm = 0.2866, lr_0 = 3.8865e-04
Loss = 9.7070e-03, PNorm = 145.0954, GNorm = 0.9951, lr_0 = 3.8838e-04
Loss = 6.6188e-03, PNorm = 145.1142, GNorm = 0.1258, lr_0 = 3.8811e-04
Loss = 6.6541e-03, PNorm = 145.1315, GNorm = 0.2456, lr_0 = 3.8785e-04
Loss = 7.7354e-03, PNorm = 145.1500, GNorm = 0.1197, lr_0 = 3.8758e-04
Loss = 7.9556e-03, PNorm = 145.1662, GNorm = 0.3128, lr_0 = 3.8732e-04
Loss = 6.6115e-03, PNorm = 145.1825, GNorm = 0.1119, lr_0 = 3.8705e-04
Loss = 7.1476e-03, PNorm = 145.1990, GNorm = 0.1659, lr_0 = 3.8679e-04
Loss = 8.0163e-03, PNorm = 145.2159, GNorm = 0.4007, lr_0 = 3.8652e-04
Loss = 7.7780e-03, PNorm = 145.2315, GNorm = 0.2296, lr_0 = 3.8626e-04
Loss = 6.8539e-03, PNorm = 145.2468, GNorm = 0.0933, lr_0 = 3.8599e-04
Loss = 8.3941e-03, PNorm = 145.2633, GNorm = 0.7021, lr_0 = 3.8573e-04
Loss = 7.4166e-03, PNorm = 145.2816, GNorm = 0.2673, lr_0 = 3.8546e-04
Loss = 8.8082e-03, PNorm = 145.3021, GNorm = 0.1783, lr_0 = 3.8520e-04
Loss = 7.8075e-03, PNorm = 145.3213, GNorm = 0.4316, lr_0 = 3.8493e-04
Loss = 6.4146e-03, PNorm = 145.3392, GNorm = 0.2608, lr_0 = 3.8467e-04
Loss = 7.1110e-03, PNorm = 145.3535, GNorm = 0.2458, lr_0 = 3.8441e-04
Loss = 7.8707e-03, PNorm = 145.3701, GNorm = 0.2888, lr_0 = 3.8414e-04
Loss = 5.9056e-03, PNorm = 145.3847, GNorm = 0.2854, lr_0 = 3.8388e-04
Loss = 9.5383e-03, PNorm = 145.3994, GNorm = 0.4397, lr_0 = 3.8362e-04
Loss = 7.2382e-03, PNorm = 145.4153, GNorm = 0.3000, lr_0 = 3.8336e-04
Loss = 6.9553e-03, PNorm = 145.4341, GNorm = 0.2109, lr_0 = 3.8309e-04
Loss = 7.2400e-03, PNorm = 145.4530, GNorm = 0.4088, lr_0 = 3.8283e-04
Loss = 6.6837e-03, PNorm = 145.4731, GNorm = 0.2292, lr_0 = 3.8257e-04
Loss = 5.8782e-03, PNorm = 145.4875, GNorm = 0.2648, lr_0 = 3.8231e-04
Loss = 7.4551e-03, PNorm = 145.5081, GNorm = 0.3401, lr_0 = 3.8204e-04
Loss = 1.0391e-02, PNorm = 145.5184, GNorm = 0.5023, lr_0 = 3.8178e-04
Loss = 6.7771e-03, PNorm = 145.5360, GNorm = 0.3502, lr_0 = 3.8152e-04
Loss = 5.3612e-03, PNorm = 145.5532, GNorm = 0.0850, lr_0 = 3.8126e-04
Loss = 7.5633e-03, PNorm = 145.5672, GNorm = 0.1579, lr_0 = 3.8100e-04
Loss = 7.8671e-03, PNorm = 145.5818, GNorm = 0.2168, lr_0 = 3.8074e-04
Loss = 7.3635e-03, PNorm = 145.6010, GNorm = 0.1848, lr_0 = 3.8048e-04
Loss = 1.0611e-02, PNorm = 145.6187, GNorm = 0.1196, lr_0 = 3.8022e-04
Loss = 7.8768e-03, PNorm = 145.6307, GNorm = 0.1118, lr_0 = 3.7995e-04
Loss = 7.6728e-03, PNorm = 145.6459, GNorm = 0.2039, lr_0 = 3.7969e-04
Loss = 7.7408e-03, PNorm = 145.6591, GNorm = 0.2817, lr_0 = 3.7943e-04
Loss = 7.6537e-03, PNorm = 145.6778, GNorm = 0.2248, lr_0 = 3.7917e-04
Loss = 7.8647e-03, PNorm = 145.6974, GNorm = 0.2021, lr_0 = 3.7891e-04
Loss = 7.1764e-03, PNorm = 145.7133, GNorm = 0.3006, lr_0 = 3.7866e-04
Loss = 6.9473e-03, PNorm = 145.7323, GNorm = 0.1883, lr_0 = 3.7840e-04
Loss = 6.8980e-03, PNorm = 145.7450, GNorm = 0.2710, lr_0 = 3.7814e-04
Loss = 7.9262e-03, PNorm = 145.7620, GNorm = 0.2527, lr_0 = 3.7788e-04
Loss = 6.2018e-03, PNorm = 145.7791, GNorm = 0.3997, lr_0 = 3.7762e-04
Loss = 8.2920e-03, PNorm = 145.7921, GNorm = 0.4531, lr_0 = 3.7736e-04
Loss = 7.7717e-03, PNorm = 145.8117, GNorm = 0.1790, lr_0 = 3.7710e-04
Loss = 7.3796e-03, PNorm = 145.8302, GNorm = 0.1135, lr_0 = 3.7684e-04
Loss = 9.8198e-03, PNorm = 145.8498, GNorm = 0.1332, lr_0 = 3.7659e-04
Loss = 6.4630e-03, PNorm = 145.8702, GNorm = 0.1967, lr_0 = 3.7633e-04
Loss = 9.2287e-03, PNorm = 145.8871, GNorm = 0.1486, lr_0 = 3.7607e-04
Loss = 9.5279e-03, PNorm = 145.9011, GNorm = 0.2771, lr_0 = 3.7581e-04
Loss = 6.4157e-03, PNorm = 145.9208, GNorm = 0.1611, lr_0 = 3.7555e-04
Loss = 7.4730e-03, PNorm = 145.9407, GNorm = 0.1625, lr_0 = 3.7530e-04
Loss = 6.6259e-03, PNorm = 145.9603, GNorm = 0.3936, lr_0 = 3.7504e-04
Loss = 6.4803e-03, PNorm = 145.9734, GNorm = 0.1357, lr_0 = 3.7478e-04
Loss = 7.9621e-03, PNorm = 145.9928, GNorm = 0.1976, lr_0 = 3.7453e-04
Loss = 8.4568e-03, PNorm = 146.0162, GNorm = 0.0996, lr_0 = 3.7427e-04
Loss = 8.4990e-03, PNorm = 146.0362, GNorm = 0.2062, lr_0 = 3.7401e-04
Loss = 6.9657e-03, PNorm = 146.0540, GNorm = 0.1696, lr_0 = 3.7376e-04
Loss = 9.1687e-03, PNorm = 146.0685, GNorm = 0.1586, lr_0 = 3.7350e-04
Loss = 6.2745e-03, PNorm = 146.0900, GNorm = 0.4262, lr_0 = 3.7325e-04
Loss = 7.3355e-03, PNorm = 146.1055, GNorm = 0.0963, lr_0 = 3.7299e-04
Loss = 6.5874e-03, PNorm = 146.1255, GNorm = 0.2955, lr_0 = 3.7273e-04
Validation mae = 0.479138
Epoch 14
Loss = 6.6828e-03, PNorm = 146.1421, GNorm = 0.3724, lr_0 = 3.7248e-04
Loss = 5.3721e-03, PNorm = 146.1539, GNorm = 0.0693, lr_0 = 3.7222e-04
Loss = 7.5754e-03, PNorm = 146.1622, GNorm = 0.1393, lr_0 = 3.7197e-04
Loss = 5.9721e-03, PNorm = 146.1755, GNorm = 0.3165, lr_0 = 3.7171e-04
Loss = 6.1783e-03, PNorm = 146.1875, GNorm = 0.2564, lr_0 = 3.7146e-04
Loss = 6.0798e-03, PNorm = 146.2004, GNorm = 0.2302, lr_0 = 3.7120e-04
Loss = 8.0306e-03, PNorm = 146.2134, GNorm = 0.4214, lr_0 = 3.7095e-04
Loss = 6.3744e-03, PNorm = 146.2253, GNorm = 0.2190, lr_0 = 3.7070e-04
Loss = 5.5234e-03, PNorm = 146.2361, GNorm = 0.1836, lr_0 = 3.7044e-04
Loss = 8.1508e-03, PNorm = 146.2502, GNorm = 0.6676, lr_0 = 3.7019e-04
Loss = 8.4589e-03, PNorm = 146.2593, GNorm = 0.7763, lr_0 = 3.6993e-04
Loss = 6.7744e-03, PNorm = 146.2803, GNorm = 0.2870, lr_0 = 3.6968e-04
Loss = 6.0741e-03, PNorm = 146.2946, GNorm = 0.2639, lr_0 = 3.6943e-04
Loss = 8.6478e-03, PNorm = 146.3091, GNorm = 0.0902, lr_0 = 3.6917e-04
Loss = 7.4288e-03, PNorm = 146.3197, GNorm = 0.4244, lr_0 = 3.6892e-04
Loss = 6.0986e-03, PNorm = 146.3295, GNorm = 0.2992, lr_0 = 3.6867e-04
Loss = 6.4313e-03, PNorm = 146.3400, GNorm = 0.2760, lr_0 = 3.6842e-04
Loss = 7.5044e-03, PNorm = 146.3554, GNorm = 0.1701, lr_0 = 3.6816e-04
Loss = 6.3336e-03, PNorm = 146.3678, GNorm = 0.4285, lr_0 = 3.6791e-04
Loss = 7.0697e-03, PNorm = 146.3809, GNorm = 0.6450, lr_0 = 3.6766e-04
Loss = 4.6364e-03, PNorm = 146.3960, GNorm = 0.3756, lr_0 = 3.6741e-04
Loss = 6.1965e-03, PNorm = 146.4092, GNorm = 0.1584, lr_0 = 3.6716e-04
Loss = 5.6128e-03, PNorm = 146.4199, GNorm = 0.1553, lr_0 = 3.6690e-04
Loss = 5.5450e-03, PNorm = 146.4322, GNorm = 0.2946, lr_0 = 3.6665e-04
Loss = 7.0306e-03, PNorm = 146.4419, GNorm = 0.4122, lr_0 = 3.6640e-04
Loss = 5.8557e-03, PNorm = 146.4500, GNorm = 0.3854, lr_0 = 3.6615e-04
Loss = 6.3116e-03, PNorm = 146.4645, GNorm = 0.3995, lr_0 = 3.6590e-04
Loss = 5.6835e-03, PNorm = 146.4779, GNorm = 0.2261, lr_0 = 3.6565e-04
Loss = 6.2135e-03, PNorm = 146.4928, GNorm = 0.4245, lr_0 = 3.6540e-04
Loss = 5.5171e-03, PNorm = 146.5063, GNorm = 0.3204, lr_0 = 3.6515e-04
Loss = 5.9524e-03, PNorm = 146.5184, GNorm = 0.0927, lr_0 = 3.6490e-04
Loss = 5.9407e-03, PNorm = 146.5332, GNorm = 0.3546, lr_0 = 3.6465e-04
Loss = 7.5152e-03, PNorm = 146.5461, GNorm = 0.2150, lr_0 = 3.6440e-04
Loss = 7.1125e-03, PNorm = 146.5612, GNorm = 0.2881, lr_0 = 3.6415e-04
Loss = 6.5335e-03, PNorm = 146.5742, GNorm = 0.3721, lr_0 = 3.6390e-04
Loss = 6.3819e-03, PNorm = 146.5849, GNorm = 0.2473, lr_0 = 3.6365e-04
Loss = 5.2422e-03, PNorm = 146.5997, GNorm = 0.2876, lr_0 = 3.6340e-04
Loss = 6.0247e-03, PNorm = 146.6166, GNorm = 0.1965, lr_0 = 3.6315e-04
Loss = 8.0453e-03, PNorm = 146.6283, GNorm = 0.2468, lr_0 = 3.6290e-04
Loss = 6.8241e-03, PNorm = 146.6408, GNorm = 0.2686, lr_0 = 3.6266e-04
Loss = 6.6977e-03, PNorm = 146.6554, GNorm = 0.3086, lr_0 = 3.6241e-04
Loss = 6.2112e-03, PNorm = 146.6666, GNorm = 0.0740, lr_0 = 3.6216e-04
Loss = 5.0431e-03, PNorm = 146.6841, GNorm = 0.1880, lr_0 = 3.6191e-04
Loss = 5.3725e-03, PNorm = 146.6997, GNorm = 0.2910, lr_0 = 3.6166e-04
Loss = 5.8681e-03, PNorm = 146.7094, GNorm = 0.1351, lr_0 = 3.6141e-04
Loss = 6.7007e-03, PNorm = 146.7189, GNorm = 0.4286, lr_0 = 3.6117e-04
Loss = 5.9500e-03, PNorm = 146.7318, GNorm = 0.3854, lr_0 = 3.6092e-04
Loss = 6.3962e-03, PNorm = 146.7434, GNorm = 0.0677, lr_0 = 3.6067e-04
Loss = 6.9695e-03, PNorm = 146.7546, GNorm = 0.2718, lr_0 = 3.6043e-04
Loss = 7.7257e-03, PNorm = 146.7688, GNorm = 0.2628, lr_0 = 3.6018e-04
Loss = 6.3351e-03, PNorm = 146.7860, GNorm = 0.2024, lr_0 = 3.5993e-04
Loss = 5.0682e-03, PNorm = 146.7988, GNorm = 0.2325, lr_0 = 3.5969e-04
Loss = 6.8972e-03, PNorm = 146.8124, GNorm = 0.2274, lr_0 = 3.5944e-04
Loss = 5.0173e-03, PNorm = 146.8291, GNorm = 0.1658, lr_0 = 3.5919e-04
Loss = 8.1484e-03, PNorm = 146.8420, GNorm = 0.2944, lr_0 = 3.5895e-04
Loss = 5.1179e-03, PNorm = 146.8539, GNorm = 0.3735, lr_0 = 3.5870e-04
Loss = 5.4285e-03, PNorm = 146.8636, GNorm = 0.2004, lr_0 = 3.5845e-04
Loss = 7.1910e-03, PNorm = 146.8722, GNorm = 0.3567, lr_0 = 3.5821e-04
Loss = 5.9110e-03, PNorm = 146.8871, GNorm = 0.0941, lr_0 = 3.5796e-04
Loss = 5.8965e-03, PNorm = 146.9058, GNorm = 0.1188, lr_0 = 3.5772e-04
Loss = 5.3419e-03, PNorm = 146.9201, GNorm = 0.1827, lr_0 = 3.5747e-04
Loss = 6.3333e-03, PNorm = 146.9335, GNorm = 0.1511, lr_0 = 3.5723e-04
Loss = 6.5081e-03, PNorm = 146.9497, GNorm = 0.4694, lr_0 = 3.5698e-04
Loss = 6.6495e-03, PNorm = 146.9613, GNorm = 0.0715, lr_0 = 3.5674e-04
Loss = 4.8512e-03, PNorm = 146.9728, GNorm = 0.3850, lr_0 = 3.5650e-04
Loss = 8.0925e-03, PNorm = 146.9821, GNorm = 0.2490, lr_0 = 3.5625e-04
Loss = 7.0162e-03, PNorm = 147.0000, GNorm = 0.2174, lr_0 = 3.5601e-04
Loss = 6.7919e-03, PNorm = 147.0186, GNorm = 0.3289, lr_0 = 3.5576e-04
Loss = 5.6722e-03, PNorm = 147.0352, GNorm = 0.2785, lr_0 = 3.5552e-04
Loss = 5.6693e-03, PNorm = 147.0487, GNorm = 0.6270, lr_0 = 3.5528e-04
Loss = 7.4014e-03, PNorm = 147.0620, GNorm = 0.1064, lr_0 = 3.5503e-04
Loss = 5.5003e-03, PNorm = 147.0771, GNorm = 0.1015, lr_0 = 3.5479e-04
Loss = 5.0227e-03, PNorm = 147.0872, GNorm = 0.3327, lr_0 = 3.5455e-04
Loss = 6.1771e-03, PNorm = 147.1020, GNorm = 0.2927, lr_0 = 3.5430e-04
Loss = 6.0439e-03, PNorm = 147.1158, GNorm = 0.0846, lr_0 = 3.5406e-04
Loss = 5.2487e-03, PNorm = 147.1308, GNorm = 0.3423, lr_0 = 3.5382e-04
Loss = 5.3055e-03, PNorm = 147.1431, GNorm = 0.1446, lr_0 = 3.5358e-04
Loss = 6.9524e-03, PNorm = 147.1569, GNorm = 0.2372, lr_0 = 3.5333e-04
Loss = 8.1925e-03, PNorm = 147.1709, GNorm = 0.3983, lr_0 = 3.5309e-04
Loss = 7.2323e-03, PNorm = 147.1850, GNorm = 0.2718, lr_0 = 3.5285e-04
Loss = 6.0799e-03, PNorm = 147.2015, GNorm = 0.0904, lr_0 = 3.5261e-04
Loss = 5.5191e-03, PNorm = 147.2162, GNorm = 0.2185, lr_0 = 3.5237e-04
Loss = 4.9629e-03, PNorm = 147.2326, GNorm = 0.0963, lr_0 = 3.5212e-04
Loss = 5.2654e-03, PNorm = 147.2457, GNorm = 0.2172, lr_0 = 3.5188e-04
Loss = 5.6514e-03, PNorm = 147.2606, GNorm = 0.1642, lr_0 = 3.5164e-04
Loss = 5.5334e-03, PNorm = 147.2803, GNorm = 0.3004, lr_0 = 3.5140e-04
Loss = 5.4334e-03, PNorm = 147.2898, GNorm = 0.1474, lr_0 = 3.5116e-04
Loss = 6.2954e-03, PNorm = 147.2984, GNorm = 0.2132, lr_0 = 3.5092e-04
Loss = 5.4037e-03, PNorm = 147.3100, GNorm = 0.2354, lr_0 = 3.5068e-04
Loss = 6.4860e-03, PNorm = 147.3237, GNorm = 0.3724, lr_0 = 3.5044e-04
Loss = 9.1513e-03, PNorm = 147.3374, GNorm = 0.2065, lr_0 = 3.5020e-04
Loss = 5.3704e-03, PNorm = 147.3498, GNorm = 0.5664, lr_0 = 3.4996e-04
Loss = 5.4337e-03, PNorm = 147.3630, GNorm = 0.0956, lr_0 = 3.4972e-04
Loss = 7.6101e-03, PNorm = 147.3784, GNorm = 0.0871, lr_0 = 3.4948e-04
Loss = 8.5203e-03, PNorm = 147.3936, GNorm = 0.3381, lr_0 = 3.4924e-04
Loss = 6.3159e-03, PNorm = 147.4069, GNorm = 0.2462, lr_0 = 3.4900e-04
Loss = 6.7440e-03, PNorm = 147.4184, GNorm = 0.3683, lr_0 = 3.4876e-04
Loss = 5.4078e-03, PNorm = 147.4307, GNorm = 0.2056, lr_0 = 3.4852e-04
Loss = 4.4663e-03, PNorm = 147.4408, GNorm = 0.1611, lr_0 = 3.4828e-04
Loss = 5.0168e-03, PNorm = 147.4518, GNorm = 0.2050, lr_0 = 3.4805e-04
Loss = 6.2069e-03, PNorm = 147.4662, GNorm = 0.2059, lr_0 = 3.4781e-04
Loss = 5.6479e-03, PNorm = 147.4804, GNorm = 0.1316, lr_0 = 3.4757e-04
Loss = 6.8536e-03, PNorm = 147.4942, GNorm = 0.1308, lr_0 = 3.4733e-04
Loss = 5.3922e-03, PNorm = 147.5092, GNorm = 0.1172, lr_0 = 3.4709e-04
Loss = 5.0258e-03, PNorm = 147.5229, GNorm = 0.2084, lr_0 = 3.4686e-04
Loss = 6.4873e-03, PNorm = 147.5336, GNorm = 0.3066, lr_0 = 3.4662e-04
Loss = 4.8992e-03, PNorm = 147.5464, GNorm = 0.2035, lr_0 = 3.4638e-04
Loss = 4.9683e-03, PNorm = 147.5590, GNorm = 0.1867, lr_0 = 3.4614e-04
Loss = 6.2156e-03, PNorm = 147.5706, GNorm = 0.3927, lr_0 = 3.4591e-04
Loss = 9.1925e-03, PNorm = 147.5861, GNorm = 0.3601, lr_0 = 3.4567e-04
Loss = 6.0196e-03, PNorm = 147.6052, GNorm = 0.1185, lr_0 = 3.4543e-04
Loss = 5.5730e-03, PNorm = 147.6196, GNorm = 0.2825, lr_0 = 3.4520e-04
Loss = 5.3705e-03, PNorm = 147.6338, GNorm = 0.0937, lr_0 = 3.4496e-04
Loss = 5.0843e-03, PNorm = 147.6485, GNorm = 0.3229, lr_0 = 3.4472e-04
Loss = 5.3031e-03, PNorm = 147.6631, GNorm = 0.0716, lr_0 = 3.4449e-04
Loss = 7.9102e-03, PNorm = 147.6763, GNorm = 0.2743, lr_0 = 3.4425e-04
Loss = 7.2949e-03, PNorm = 147.6869, GNorm = 0.3416, lr_0 = 3.4402e-04
Loss = 5.5176e-03, PNorm = 147.7027, GNorm = 0.3117, lr_0 = 3.4378e-04
Loss = 5.1951e-03, PNorm = 147.7177, GNorm = 0.1498, lr_0 = 3.4354e-04
Loss = 7.2044e-03, PNorm = 147.7333, GNorm = 0.2750, lr_0 = 3.4331e-04
Validation mae = 0.478162
Epoch 15
Loss = 4.9649e-03, PNorm = 147.7439, GNorm = 0.1126, lr_0 = 3.4307e-04
Loss = 5.5027e-03, PNorm = 147.7518, GNorm = 0.2782, lr_0 = 3.4284e-04
Loss = 5.8147e-03, PNorm = 147.7646, GNorm = 0.1469, lr_0 = 3.4260e-04
Loss = 4.8788e-03, PNorm = 147.7787, GNorm = 0.0868, lr_0 = 3.4237e-04
Loss = 4.6739e-03, PNorm = 147.7923, GNorm = 0.2075, lr_0 = 3.4213e-04
Loss = 4.7520e-03, PNorm = 147.8030, GNorm = 0.0926, lr_0 = 3.4190e-04
Loss = 4.2626e-03, PNorm = 147.8121, GNorm = 0.2032, lr_0 = 3.4167e-04
Loss = 5.6414e-03, PNorm = 147.8186, GNorm = 0.3325, lr_0 = 3.4143e-04
Loss = 5.1144e-03, PNorm = 147.8267, GNorm = 0.1033, lr_0 = 3.4120e-04
Loss = 4.4120e-03, PNorm = 147.8388, GNorm = 0.3956, lr_0 = 3.4096e-04
Loss = 5.3751e-03, PNorm = 147.8495, GNorm = 0.1922, lr_0 = 3.4073e-04
Loss = 5.1587e-03, PNorm = 147.8600, GNorm = 0.2438, lr_0 = 3.4050e-04
Loss = 5.4457e-03, PNorm = 147.8735, GNorm = 0.3483, lr_0 = 3.4026e-04
Loss = 5.4901e-03, PNorm = 147.8832, GNorm = 0.2105, lr_0 = 3.4003e-04
Loss = 5.8428e-03, PNorm = 147.8937, GNorm = 0.3205, lr_0 = 3.3980e-04
Loss = 5.4918e-03, PNorm = 147.9069, GNorm = 0.0677, lr_0 = 3.3956e-04
Loss = 5.3104e-03, PNorm = 147.9149, GNorm = 0.6270, lr_0 = 3.3933e-04
Loss = 4.9701e-03, PNorm = 147.9249, GNorm = 0.2320, lr_0 = 3.3910e-04
Loss = 6.4313e-03, PNorm = 147.9351, GNorm = 0.2634, lr_0 = 3.3887e-04
Loss = 4.4861e-03, PNorm = 147.9465, GNorm = 0.1031, lr_0 = 3.3864e-04
Loss = 4.4366e-03, PNorm = 147.9577, GNorm = 0.2535, lr_0 = 3.3840e-04
Loss = 5.5589e-03, PNorm = 147.9688, GNorm = 0.1052, lr_0 = 3.3817e-04
Loss = 3.9144e-03, PNorm = 147.9773, GNorm = 0.1554, lr_0 = 3.3794e-04
Loss = 4.9923e-03, PNorm = 147.9873, GNorm = 0.1336, lr_0 = 3.3771e-04
Loss = 4.9717e-03, PNorm = 147.9970, GNorm = 0.2857, lr_0 = 3.3748e-04
Loss = 5.2814e-03, PNorm = 148.0091, GNorm = 0.3786, lr_0 = 3.3725e-04
Loss = 4.5542e-03, PNorm = 148.0220, GNorm = 0.1456, lr_0 = 3.3701e-04
Loss = 4.2950e-03, PNorm = 148.0338, GNorm = 0.3834, lr_0 = 3.3678e-04
Loss = 4.0043e-03, PNorm = 148.0443, GNorm = 0.1960, lr_0 = 3.3655e-04
Loss = 4.8957e-03, PNorm = 148.0515, GNorm = 0.0904, lr_0 = 3.3632e-04
Loss = 4.2138e-03, PNorm = 148.0604, GNorm = 0.1285, lr_0 = 3.3609e-04
Loss = 6.2014e-03, PNorm = 148.0724, GNorm = 0.4239, lr_0 = 3.3586e-04
Loss = 6.1925e-03, PNorm = 148.0822, GNorm = 0.0867, lr_0 = 3.3563e-04
Loss = 6.3451e-03, PNorm = 148.0926, GNorm = 0.2644, lr_0 = 3.3540e-04
Loss = 7.9741e-03, PNorm = 148.1024, GNorm = 0.1803, lr_0 = 3.3517e-04
Loss = 7.1396e-03, PNorm = 148.1147, GNorm = 0.1918, lr_0 = 3.3494e-04
Loss = 4.9814e-03, PNorm = 148.1238, GNorm = 0.2083, lr_0 = 3.3471e-04
Loss = 5.5864e-03, PNorm = 148.1385, GNorm = 0.1386, lr_0 = 3.3448e-04
Loss = 6.6152e-03, PNorm = 148.1517, GNorm = 0.1775, lr_0 = 3.3425e-04
Loss = 4.5929e-03, PNorm = 148.1609, GNorm = 0.1825, lr_0 = 3.3403e-04
Loss = 5.7922e-03, PNorm = 148.1689, GNorm = 0.4380, lr_0 = 3.3380e-04
Loss = 4.5955e-03, PNorm = 148.1800, GNorm = 0.0700, lr_0 = 3.3357e-04
Loss = 5.1797e-03, PNorm = 148.1919, GNorm = 0.1752, lr_0 = 3.3334e-04
Loss = 5.8906e-03, PNorm = 148.2015, GNorm = 0.1420, lr_0 = 3.3311e-04
Loss = 6.4072e-03, PNorm = 148.2159, GNorm = 0.2047, lr_0 = 3.3288e-04
Loss = 6.2890e-03, PNorm = 148.2287, GNorm = 0.3542, lr_0 = 3.3265e-04
Loss = 4.1655e-03, PNorm = 148.2454, GNorm = 0.2115, lr_0 = 3.3243e-04
Loss = 5.4977e-03, PNorm = 148.2552, GNorm = 0.0929, lr_0 = 3.3220e-04
Loss = 4.6390e-03, PNorm = 148.2631, GNorm = 0.1519, lr_0 = 3.3197e-04
Loss = 5.4839e-03, PNorm = 148.2708, GNorm = 0.0963, lr_0 = 3.3174e-04
Loss = 4.2399e-03, PNorm = 148.2817, GNorm = 0.0827, lr_0 = 3.3152e-04
Loss = 4.5849e-03, PNorm = 148.2934, GNorm = 0.2060, lr_0 = 3.3129e-04
Loss = 5.6637e-03, PNorm = 148.3058, GNorm = 0.4161, lr_0 = 3.3106e-04
Loss = 5.9520e-03, PNorm = 148.3189, GNorm = 0.1619, lr_0 = 3.3084e-04
Loss = 5.1223e-03, PNorm = 148.3360, GNorm = 0.1995, lr_0 = 3.3061e-04
Loss = 7.2814e-03, PNorm = 148.3503, GNorm = 0.1007, lr_0 = 3.3038e-04
Loss = 6.1512e-03, PNorm = 148.3640, GNorm = 0.1253, lr_0 = 3.3016e-04
Loss = 4.6851e-03, PNorm = 148.3726, GNorm = 0.1941, lr_0 = 3.2993e-04
Loss = 6.2881e-03, PNorm = 148.3862, GNorm = 0.3520, lr_0 = 3.2970e-04
Loss = 5.3677e-03, PNorm = 148.4005, GNorm = 0.3279, lr_0 = 3.2948e-04
Loss = 5.1528e-03, PNorm = 148.4098, GNorm = 0.3507, lr_0 = 3.2925e-04
Loss = 4.8088e-03, PNorm = 148.4201, GNorm = 0.1643, lr_0 = 3.2903e-04
Loss = 5.1850e-03, PNorm = 148.4298, GNorm = 0.2693, lr_0 = 3.2880e-04
Loss = 4.8609e-03, PNorm = 148.4437, GNorm = 0.3466, lr_0 = 3.2858e-04
Loss = 7.0779e-03, PNorm = 148.4511, GNorm = 0.0619, lr_0 = 3.2835e-04
Loss = 4.5224e-03, PNorm = 148.4584, GNorm = 0.2158, lr_0 = 3.2813e-04
Loss = 6.7305e-03, PNorm = 148.4739, GNorm = 0.3964, lr_0 = 3.2790e-04
Loss = 4.3022e-03, PNorm = 148.4912, GNorm = 0.3903, lr_0 = 3.2768e-04
Loss = 5.0501e-03, PNorm = 148.5049, GNorm = 0.4866, lr_0 = 3.2745e-04
Loss = 5.7674e-03, PNorm = 148.5178, GNorm = 0.2432, lr_0 = 3.2723e-04
Loss = 5.2937e-03, PNorm = 148.5306, GNorm = 0.1423, lr_0 = 3.2700e-04
Loss = 5.4887e-03, PNorm = 148.5376, GNorm = 0.0680, lr_0 = 3.2678e-04
Loss = 5.0259e-03, PNorm = 148.5513, GNorm = 0.1308, lr_0 = 3.2656e-04
Loss = 1.0276e-02, PNorm = 148.5636, GNorm = 0.3131, lr_0 = 3.2633e-04
Loss = 4.6999e-03, PNorm = 148.5756, GNorm = 0.2639, lr_0 = 3.2611e-04
Loss = 4.8732e-03, PNorm = 148.5903, GNorm = 0.2934, lr_0 = 3.2589e-04
Loss = 7.2718e-03, PNorm = 148.6070, GNorm = 0.2256, lr_0 = 3.2566e-04
Loss = 4.8731e-03, PNorm = 148.6209, GNorm = 0.0831, lr_0 = 3.2544e-04
Loss = 5.4438e-03, PNorm = 148.6319, GNorm = 0.0567, lr_0 = 3.2522e-04
Loss = 4.4726e-03, PNorm = 148.6418, GNorm = 0.3684, lr_0 = 3.2499e-04
Loss = 5.0027e-03, PNorm = 148.6512, GNorm = 0.1092, lr_0 = 3.2477e-04
Loss = 7.6089e-03, PNorm = 148.6647, GNorm = 0.0904, lr_0 = 3.2455e-04
Loss = 4.6290e-03, PNorm = 148.6734, GNorm = 0.2738, lr_0 = 3.2433e-04
Loss = 4.1366e-03, PNorm = 148.6830, GNorm = 0.0649, lr_0 = 3.2410e-04
Loss = 4.7293e-03, PNorm = 148.6974, GNorm = 0.2558, lr_0 = 3.2388e-04
Loss = 4.8286e-03, PNorm = 148.7085, GNorm = 0.4806, lr_0 = 3.2366e-04
Loss = 6.8574e-03, PNorm = 148.7206, GNorm = 0.1825, lr_0 = 3.2344e-04
Loss = 4.5485e-03, PNorm = 148.7353, GNorm = 0.1584, lr_0 = 3.2322e-04
Loss = 5.2767e-03, PNorm = 148.7431, GNorm = 0.4164, lr_0 = 3.2300e-04
Loss = 4.2423e-03, PNorm = 148.7511, GNorm = 0.1470, lr_0 = 3.2277e-04
Loss = 4.5609e-03, PNorm = 148.7579, GNorm = 0.1069, lr_0 = 3.2255e-04
Loss = 4.8737e-03, PNorm = 148.7670, GNorm = 0.1597, lr_0 = 3.2233e-04
Loss = 7.7428e-03, PNorm = 148.7801, GNorm = 0.1416, lr_0 = 3.2211e-04
Loss = 4.9036e-03, PNorm = 148.7905, GNorm = 0.3075, lr_0 = 3.2189e-04
Loss = 5.6926e-03, PNorm = 148.8008, GNorm = 0.1178, lr_0 = 3.2167e-04
Loss = 4.3946e-03, PNorm = 148.8178, GNorm = 0.1451, lr_0 = 3.2145e-04
Loss = 4.7052e-03, PNorm = 148.8319, GNorm = 0.2310, lr_0 = 3.2123e-04
Loss = 3.8589e-03, PNorm = 148.8454, GNorm = 0.2391, lr_0 = 3.2101e-04
Loss = 4.6988e-03, PNorm = 148.8557, GNorm = 0.3197, lr_0 = 3.2079e-04
Loss = 5.1783e-03, PNorm = 148.8669, GNorm = 0.0948, lr_0 = 3.2057e-04
Loss = 4.5361e-03, PNorm = 148.8798, GNorm = 0.2681, lr_0 = 3.2035e-04
Loss = 5.4271e-03, PNorm = 148.8871, GNorm = 0.0940, lr_0 = 3.2013e-04
Loss = 6.2028e-03, PNorm = 148.9002, GNorm = 0.2017, lr_0 = 3.1991e-04
Loss = 6.8924e-03, PNorm = 148.9131, GNorm = 0.2974, lr_0 = 3.1969e-04
Loss = 5.5998e-03, PNorm = 148.9244, GNorm = 0.2833, lr_0 = 3.1947e-04
Loss = 5.9089e-03, PNorm = 148.9411, GNorm = 0.1640, lr_0 = 3.1925e-04
Loss = 4.1647e-03, PNorm = 148.9572, GNorm = 0.1723, lr_0 = 3.1904e-04
Loss = 4.3257e-03, PNorm = 148.9675, GNorm = 0.3414, lr_0 = 3.1882e-04
Loss = 6.0321e-03, PNorm = 148.9803, GNorm = 0.1606, lr_0 = 3.1860e-04
Loss = 4.4698e-03, PNorm = 148.9891, GNorm = 0.0931, lr_0 = 3.1838e-04
Loss = 7.5956e-03, PNorm = 148.9997, GNorm = 0.2137, lr_0 = 3.1816e-04
Loss = 5.2273e-03, PNorm = 149.0102, GNorm = 0.0675, lr_0 = 3.1794e-04
Loss = 4.5823e-03, PNorm = 149.0183, GNorm = 0.0891, lr_0 = 3.1773e-04
Loss = 4.8045e-03, PNorm = 149.0292, GNorm = 0.3976, lr_0 = 3.1751e-04
Loss = 5.4839e-03, PNorm = 149.0415, GNorm = 0.2502, lr_0 = 3.1729e-04
Loss = 4.3708e-03, PNorm = 149.0504, GNorm = 0.1408, lr_0 = 3.1707e-04
Loss = 6.8884e-03, PNorm = 149.0636, GNorm = 0.3812, lr_0 = 3.1686e-04
Loss = 4.9302e-03, PNorm = 149.0762, GNorm = 0.2040, lr_0 = 3.1664e-04
Loss = 4.6497e-03, PNorm = 149.0860, GNorm = 0.1954, lr_0 = 3.1642e-04
Loss = 7.5613e-03, PNorm = 149.0983, GNorm = 0.2994, lr_0 = 3.1621e-04
Validation mae = 0.478732
Epoch 16
Loss = 4.7550e-03, PNorm = 149.1091, GNorm = 0.2328, lr_0 = 3.1599e-04
Loss = 3.9554e-03, PNorm = 149.1216, GNorm = 0.4113, lr_0 = 3.1577e-04
Loss = 4.0365e-03, PNorm = 149.1336, GNorm = 0.3775, lr_0 = 3.1556e-04
Loss = 4.1036e-03, PNorm = 149.1412, GNorm = 0.1934, lr_0 = 3.1534e-04
Loss = 3.4893e-03, PNorm = 149.1496, GNorm = 0.2716, lr_0 = 3.1512e-04
Loss = 3.7877e-03, PNorm = 149.1575, GNorm = 0.2785, lr_0 = 3.1491e-04
Loss = 4.7945e-03, PNorm = 149.1623, GNorm = 0.2522, lr_0 = 3.1469e-04
Loss = 3.7764e-03, PNorm = 149.1690, GNorm = 0.1953, lr_0 = 3.1448e-04
Loss = 4.2594e-03, PNorm = 149.1769, GNorm = 0.1836, lr_0 = 3.1426e-04
Loss = 8.6873e-03, PNorm = 149.1889, GNorm = 0.2021, lr_0 = 3.1405e-04
Loss = 3.8508e-03, PNorm = 149.1979, GNorm = 0.2419, lr_0 = 3.1383e-04
Loss = 4.1176e-03, PNorm = 149.2071, GNorm = 0.2652, lr_0 = 3.1362e-04
Loss = 4.2269e-03, PNorm = 149.2157, GNorm = 0.3274, lr_0 = 3.1340e-04
Loss = 4.7881e-03, PNorm = 149.2206, GNorm = 0.4336, lr_0 = 3.1319e-04
Loss = 5.0043e-03, PNorm = 149.2272, GNorm = 0.1698, lr_0 = 3.1297e-04
Loss = 4.0394e-03, PNorm = 149.2373, GNorm = 0.5202, lr_0 = 3.1276e-04
Loss = 5.5506e-03, PNorm = 149.2462, GNorm = 0.1773, lr_0 = 3.1254e-04
Loss = 3.5618e-03, PNorm = 149.2563, GNorm = 0.3632, lr_0 = 3.1233e-04
Loss = 3.9533e-03, PNorm = 149.2662, GNorm = 0.0751, lr_0 = 3.1212e-04
Loss = 4.5065e-03, PNorm = 149.2758, GNorm = 0.3467, lr_0 = 3.1190e-04
Loss = 4.3432e-03, PNorm = 149.2834, GNorm = 0.2716, lr_0 = 3.1169e-04
Loss = 5.0120e-03, PNorm = 149.2934, GNorm = 0.1196, lr_0 = 3.1147e-04
Loss = 3.5912e-03, PNorm = 149.3038, GNorm = 0.0711, lr_0 = 3.1126e-04
Loss = 4.0675e-03, PNorm = 149.3127, GNorm = 0.2076, lr_0 = 3.1105e-04
Loss = 4.8767e-03, PNorm = 149.3192, GNorm = 0.2365, lr_0 = 3.1083e-04
Loss = 4.7357e-03, PNorm = 149.3273, GNorm = 0.0984, lr_0 = 3.1062e-04
Loss = 4.7298e-03, PNorm = 149.3362, GNorm = 0.1372, lr_0 = 3.1041e-04
Loss = 3.9672e-03, PNorm = 149.3433, GNorm = 0.0851, lr_0 = 3.1020e-04
Loss = 3.8945e-03, PNorm = 149.3498, GNorm = 0.2646, lr_0 = 3.0998e-04
Loss = 3.8457e-03, PNorm = 149.3625, GNorm = 0.2196, lr_0 = 3.0977e-04
Loss = 5.0671e-03, PNorm = 149.3752, GNorm = 0.5483, lr_0 = 3.0956e-04
Loss = 4.2768e-03, PNorm = 149.3822, GNorm = 0.0968, lr_0 = 3.0935e-04
Loss = 3.9491e-03, PNorm = 149.3871, GNorm = 0.1350, lr_0 = 3.0914e-04
Loss = 4.5023e-03, PNorm = 149.3949, GNorm = 0.0755, lr_0 = 3.0892e-04
Loss = 4.7534e-03, PNorm = 149.4048, GNorm = 0.0940, lr_0 = 3.0871e-04
Loss = 5.9811e-03, PNorm = 149.4150, GNorm = 0.3799, lr_0 = 3.0850e-04
Loss = 4.7166e-03, PNorm = 149.4240, GNorm = 0.2422, lr_0 = 3.0829e-04
Loss = 4.4360e-03, PNorm = 149.4351, GNorm = 0.0529, lr_0 = 3.0808e-04
Loss = 4.0258e-03, PNorm = 149.4453, GNorm = 0.0774, lr_0 = 3.0787e-04
Loss = 4.5234e-03, PNorm = 149.4523, GNorm = 0.1931, lr_0 = 3.0766e-04
Loss = 3.8897e-03, PNorm = 149.4602, GNorm = 0.2859, lr_0 = 3.0745e-04
Loss = 4.2568e-03, PNorm = 149.4700, GNorm = 0.1013, lr_0 = 3.0723e-04
Loss = 4.0586e-03, PNorm = 149.4767, GNorm = 0.1306, lr_0 = 3.0702e-04
Loss = 4.0135e-03, PNorm = 149.4878, GNorm = 0.1908, lr_0 = 3.0681e-04
Loss = 4.1095e-03, PNorm = 149.4995, GNorm = 0.1458, lr_0 = 3.0660e-04
Loss = 3.8095e-03, PNorm = 149.5082, GNorm = 0.2249, lr_0 = 3.0639e-04
Loss = 5.0191e-03, PNorm = 149.5175, GNorm = 0.1040, lr_0 = 3.0618e-04
Loss = 4.0871e-03, PNorm = 149.5279, GNorm = 0.3298, lr_0 = 3.0597e-04
Loss = 5.8330e-03, PNorm = 149.5367, GNorm = 0.2719, lr_0 = 3.0576e-04
Loss = 3.9287e-03, PNorm = 149.5457, GNorm = 0.2008, lr_0 = 3.0555e-04
Loss = 3.7281e-03, PNorm = 149.5522, GNorm = 0.1261, lr_0 = 3.0535e-04
Loss = 4.7212e-03, PNorm = 149.5655, GNorm = 0.2283, lr_0 = 3.0514e-04
Loss = 4.1451e-03, PNorm = 149.5758, GNorm = 0.1314, lr_0 = 3.0493e-04
Loss = 4.1605e-03, PNorm = 149.5858, GNorm = 0.0499, lr_0 = 3.0472e-04
Loss = 4.6533e-03, PNorm = 149.5975, GNorm = 0.1756, lr_0 = 3.0451e-04
Loss = 6.2502e-03, PNorm = 149.6102, GNorm = 0.3118, lr_0 = 3.0430e-04
Loss = 5.5443e-03, PNorm = 149.6218, GNorm = 0.3407, lr_0 = 3.0409e-04
Loss = 5.1298e-03, PNorm = 149.6285, GNorm = 0.1963, lr_0 = 3.0388e-04
Loss = 4.4065e-03, PNorm = 149.6373, GNorm = 0.1286, lr_0 = 3.0368e-04
Loss = 4.8456e-03, PNorm = 149.6515, GNorm = 0.1018, lr_0 = 3.0347e-04
Loss = 4.3435e-03, PNorm = 149.6639, GNorm = 0.2421, lr_0 = 3.0326e-04
Loss = 3.4601e-03, PNorm = 149.6743, GNorm = 0.2300, lr_0 = 3.0305e-04
Loss = 3.3907e-03, PNorm = 149.6830, GNorm = 0.0656, lr_0 = 3.0284e-04
Loss = 4.8155e-03, PNorm = 149.6955, GNorm = 0.1699, lr_0 = 3.0264e-04
Loss = 4.1032e-03, PNorm = 149.7093, GNorm = 0.0685, lr_0 = 3.0243e-04
Loss = 4.6302e-03, PNorm = 149.7208, GNorm = 0.1267, lr_0 = 3.0222e-04
Loss = 3.8798e-03, PNorm = 149.7311, GNorm = 0.0868, lr_0 = 3.0202e-04
Loss = 3.5709e-03, PNorm = 149.7399, GNorm = 0.1203, lr_0 = 3.0181e-04
Loss = 3.6377e-03, PNorm = 149.7477, GNorm = 0.0786, lr_0 = 3.0160e-04
Loss = 4.1487e-03, PNorm = 149.7582, GNorm = 0.1258, lr_0 = 3.0140e-04
Loss = 4.9740e-03, PNorm = 149.7689, GNorm = 0.4281, lr_0 = 3.0119e-04
Loss = 4.2424e-03, PNorm = 149.7796, GNorm = 0.0463, lr_0 = 3.0098e-04
Loss = 4.1913e-03, PNorm = 149.7916, GNorm = 0.1289, lr_0 = 3.0078e-04
Loss = 6.0013e-03, PNorm = 149.8057, GNorm = 0.5151, lr_0 = 3.0057e-04
Loss = 6.0984e-03, PNorm = 149.8166, GNorm = 0.3596, lr_0 = 3.0036e-04
Loss = 4.6696e-03, PNorm = 149.8250, GNorm = 0.3896, lr_0 = 3.0016e-04
Loss = 5.9923e-03, PNorm = 149.8334, GNorm = 0.2791, lr_0 = 2.9995e-04
Loss = 4.6222e-03, PNorm = 149.8421, GNorm = 0.2741, lr_0 = 2.9975e-04
Loss = 4.4584e-03, PNorm = 149.8543, GNorm = 0.1456, lr_0 = 2.9954e-04
Loss = 3.9736e-03, PNorm = 149.8681, GNorm = 0.1454, lr_0 = 2.9934e-04
Loss = 4.2938e-03, PNorm = 149.8791, GNorm = 0.1949, lr_0 = 2.9913e-04
Loss = 6.2914e-03, PNorm = 149.8857, GNorm = 0.2185, lr_0 = 2.9893e-04
Loss = 3.8353e-03, PNorm = 149.8948, GNorm = 0.3996, lr_0 = 2.9872e-04
Loss = 4.8834e-03, PNorm = 149.9044, GNorm = 0.1631, lr_0 = 2.9852e-04
Loss = 3.5490e-03, PNorm = 149.9150, GNorm = 0.1800, lr_0 = 2.9831e-04
Loss = 3.6386e-03, PNorm = 149.9244, GNorm = 0.1065, lr_0 = 2.9811e-04
Loss = 3.9522e-03, PNorm = 149.9335, GNorm = 0.1863, lr_0 = 2.9790e-04
Loss = 7.5585e-03, PNorm = 149.9465, GNorm = 0.2392, lr_0 = 2.9770e-04
Loss = 3.9725e-03, PNorm = 149.9582, GNorm = 0.1575, lr_0 = 2.9750e-04
Loss = 4.0657e-03, PNorm = 149.9703, GNorm = 0.4492, lr_0 = 2.9729e-04
Loss = 5.5673e-03, PNorm = 149.9768, GNorm = 0.1127, lr_0 = 2.9709e-04
Loss = 4.9892e-03, PNorm = 149.9812, GNorm = 0.1494, lr_0 = 2.9689e-04
Loss = 4.4893e-03, PNorm = 149.9894, GNorm = 0.0792, lr_0 = 2.9668e-04
Loss = 5.4543e-03, PNorm = 149.9977, GNorm = 0.1918, lr_0 = 2.9648e-04
Loss = 8.5355e-03, PNorm = 150.0092, GNorm = 0.2150, lr_0 = 2.9628e-04
Loss = 4.2295e-03, PNorm = 150.0184, GNorm = 0.4373, lr_0 = 2.9607e-04
Loss = 5.4784e-03, PNorm = 150.0322, GNorm = 0.1328, lr_0 = 2.9587e-04
Loss = 4.0279e-03, PNorm = 150.0419, GNorm = 0.1661, lr_0 = 2.9567e-04
Loss = 6.3782e-03, PNorm = 150.0513, GNorm = 0.2450, lr_0 = 2.9546e-04
Loss = 3.6568e-03, PNorm = 150.0631, GNorm = 0.1394, lr_0 = 2.9526e-04
Loss = 5.4881e-03, PNorm = 150.0735, GNorm = 0.2400, lr_0 = 2.9506e-04
Loss = 3.8340e-03, PNorm = 150.0816, GNorm = 0.0614, lr_0 = 2.9486e-04
Loss = 5.8058e-03, PNorm = 150.0894, GNorm = 0.2264, lr_0 = 2.9466e-04
Loss = 6.3366e-03, PNorm = 150.1001, GNorm = 0.2356, lr_0 = 2.9445e-04
Loss = 3.7646e-03, PNorm = 150.1113, GNorm = 0.1133, lr_0 = 2.9425e-04
Loss = 4.9316e-03, PNorm = 150.1218, GNorm = 0.3325, lr_0 = 2.9405e-04
Loss = 5.1337e-03, PNorm = 150.1323, GNorm = 0.4918, lr_0 = 2.9385e-04
Loss = 6.8641e-03, PNorm = 150.1461, GNorm = 0.1271, lr_0 = 2.9365e-04
Loss = 3.6930e-03, PNorm = 150.1598, GNorm = 0.2578, lr_0 = 2.9345e-04
Loss = 6.6260e-03, PNorm = 150.1693, GNorm = 0.1913, lr_0 = 2.9325e-04
Loss = 4.1125e-03, PNorm = 150.1788, GNorm = 0.0615, lr_0 = 2.9305e-04
Loss = 5.2386e-03, PNorm = 150.1888, GNorm = 0.3480, lr_0 = 2.9284e-04
Loss = 4.7173e-03, PNorm = 150.2009, GNorm = 0.1709, lr_0 = 2.9264e-04
Loss = 4.4980e-03, PNorm = 150.2132, GNorm = 0.2145, lr_0 = 2.9244e-04
Loss = 4.9946e-03, PNorm = 150.2229, GNorm = 0.2055, lr_0 = 2.9224e-04
Loss = 6.1934e-03, PNorm = 150.2295, GNorm = 0.1690, lr_0 = 2.9204e-04
Loss = 3.4876e-03, PNorm = 150.2389, GNorm = 0.0927, lr_0 = 2.9184e-04
Loss = 3.9657e-03, PNorm = 150.2529, GNorm = 0.4570, lr_0 = 2.9164e-04
Loss = 4.6349e-03, PNorm = 150.2640, GNorm = 0.0676, lr_0 = 2.9144e-04
Loss = 6.5977e-03, PNorm = 150.2722, GNorm = 0.0734, lr_0 = 2.9124e-04
Validation mae = 0.476376
Epoch 17
Loss = 3.7208e-03, PNorm = 150.2788, GNorm = 0.1674, lr_0 = 2.9104e-04
Loss = 5.0368e-03, PNorm = 150.2826, GNorm = 0.0489, lr_0 = 2.9084e-04
Loss = 3.7657e-03, PNorm = 150.2870, GNorm = 0.2899, lr_0 = 2.9065e-04
Loss = 3.9524e-03, PNorm = 150.2933, GNorm = 0.2945, lr_0 = 2.9045e-04
Loss = 3.5327e-03, PNorm = 150.2999, GNorm = 0.1505, lr_0 = 2.9025e-04
Loss = 3.8717e-03, PNorm = 150.3082, GNorm = 0.1977, lr_0 = 2.9005e-04
Loss = 4.7074e-03, PNorm = 150.3183, GNorm = 0.3760, lr_0 = 2.8985e-04
Loss = 3.2299e-03, PNorm = 150.3283, GNorm = 0.2627, lr_0 = 2.8965e-04
Loss = 4.2437e-03, PNorm = 150.3352, GNorm = 0.1382, lr_0 = 2.8945e-04
Loss = 5.1295e-03, PNorm = 150.3443, GNorm = 0.0599, lr_0 = 2.8925e-04
Loss = 4.6070e-03, PNorm = 150.3522, GNorm = 0.2178, lr_0 = 2.8906e-04
Loss = 3.4099e-03, PNorm = 150.3617, GNorm = 0.0781, lr_0 = 2.8886e-04
Loss = 4.1449e-03, PNorm = 150.3685, GNorm = 0.1134, lr_0 = 2.8866e-04
Loss = 3.9198e-03, PNorm = 150.3784, GNorm = 0.1600, lr_0 = 2.8846e-04
Loss = 3.7821e-03, PNorm = 150.3865, GNorm = 0.3070, lr_0 = 2.8826e-04
Loss = 3.7529e-03, PNorm = 150.3951, GNorm = 0.3209, lr_0 = 2.8807e-04
Loss = 3.1095e-03, PNorm = 150.4009, GNorm = 0.0818, lr_0 = 2.8787e-04
Loss = 3.5781e-03, PNorm = 150.4084, GNorm = 0.1334, lr_0 = 2.8767e-04
Loss = 3.5515e-03, PNorm = 150.4157, GNorm = 0.2243, lr_0 = 2.8748e-04
Loss = 3.0607e-03, PNorm = 150.4213, GNorm = 0.3277, lr_0 = 2.8728e-04
Loss = 4.3716e-03, PNorm = 150.4286, GNorm = 0.2821, lr_0 = 2.8708e-04
Loss = 3.4791e-03, PNorm = 150.4355, GNorm = 0.3050, lr_0 = 2.8689e-04
Loss = 3.8419e-03, PNorm = 150.4421, GNorm = 0.1464, lr_0 = 2.8669e-04
Loss = 2.9971e-03, PNorm = 150.4520, GNorm = 0.2569, lr_0 = 2.8649e-04
Loss = 3.3489e-03, PNorm = 150.4588, GNorm = 0.1722, lr_0 = 2.8630e-04
Loss = 3.0375e-03, PNorm = 150.4633, GNorm = 0.1294, lr_0 = 2.8610e-04
Loss = 5.0998e-03, PNorm = 150.4677, GNorm = 0.1913, lr_0 = 2.8590e-04
Loss = 3.3166e-03, PNorm = 150.4715, GNorm = 0.0678, lr_0 = 2.8571e-04
Loss = 3.9088e-03, PNorm = 150.4787, GNorm = 0.1343, lr_0 = 2.8551e-04
Loss = 3.2603e-03, PNorm = 150.4853, GNorm = 0.1637, lr_0 = 2.8532e-04
Loss = 3.7150e-03, PNorm = 150.4965, GNorm = 0.1635, lr_0 = 2.8512e-04
Loss = 3.5608e-03, PNorm = 150.5055, GNorm = 0.2013, lr_0 = 2.8493e-04
Loss = 3.2477e-03, PNorm = 150.5145, GNorm = 0.1094, lr_0 = 2.8473e-04
Loss = 3.2467e-03, PNorm = 150.5211, GNorm = 0.0613, lr_0 = 2.8454e-04
Loss = 3.0894e-03, PNorm = 150.5267, GNorm = 0.2574, lr_0 = 2.8434e-04
Loss = 3.4270e-03, PNorm = 150.5336, GNorm = 0.1271, lr_0 = 2.8415e-04
Loss = 4.3620e-03, PNorm = 150.5385, GNorm = 0.2442, lr_0 = 2.8395e-04
Loss = 3.1403e-03, PNorm = 150.5457, GNorm = 0.2006, lr_0 = 2.8376e-04
Loss = 3.1383e-03, PNorm = 150.5519, GNorm = 0.1674, lr_0 = 2.8356e-04
Loss = 3.6437e-03, PNorm = 150.5606, GNorm = 0.2277, lr_0 = 2.8337e-04
Loss = 4.3870e-03, PNorm = 150.5681, GNorm = 0.1656, lr_0 = 2.8317e-04
Loss = 4.2633e-03, PNorm = 150.5769, GNorm = 0.1635, lr_0 = 2.8298e-04
Loss = 4.3387e-03, PNorm = 150.5841, GNorm = 0.2404, lr_0 = 2.8279e-04
Loss = 5.0832e-03, PNorm = 150.5948, GNorm = 0.1532, lr_0 = 2.8259e-04
Loss = 3.6539e-03, PNorm = 150.6027, GNorm = 0.1534, lr_0 = 2.8240e-04
Loss = 5.3497e-03, PNorm = 150.6149, GNorm = 0.2274, lr_0 = 2.8221e-04
Loss = 3.2620e-03, PNorm = 150.6239, GNorm = 0.1789, lr_0 = 2.8201e-04
Loss = 4.5626e-03, PNorm = 150.6318, GNorm = 0.3405, lr_0 = 2.8182e-04
Loss = 5.9818e-03, PNorm = 150.6398, GNorm = 0.2334, lr_0 = 2.8163e-04
Loss = 4.0178e-03, PNorm = 150.6458, GNorm = 0.2824, lr_0 = 2.8143e-04
Loss = 3.7704e-03, PNorm = 150.6546, GNorm = 0.1355, lr_0 = 2.8124e-04
Loss = 3.4758e-03, PNorm = 150.6644, GNorm = 0.2345, lr_0 = 2.8105e-04
Loss = 3.6025e-03, PNorm = 150.6721, GNorm = 0.2326, lr_0 = 2.8085e-04
Loss = 4.1160e-03, PNorm = 150.6768, GNorm = 0.1568, lr_0 = 2.8066e-04
Loss = 8.7109e-03, PNorm = 150.6773, GNorm = 0.5711, lr_0 = 2.8047e-04
Loss = 6.7468e-03, PNorm = 150.6896, GNorm = 0.2439, lr_0 = 2.8028e-04
Loss = 3.6575e-03, PNorm = 150.7033, GNorm = 0.1821, lr_0 = 2.8009e-04
Loss = 3.4810e-03, PNorm = 150.7108, GNorm = 0.2574, lr_0 = 2.7989e-04
Loss = 3.3729e-03, PNorm = 150.7188, GNorm = 0.0989, lr_0 = 2.7970e-04
Loss = 3.2161e-03, PNorm = 150.7317, GNorm = 0.1852, lr_0 = 2.7951e-04
Loss = 4.2063e-03, PNorm = 150.7410, GNorm = 0.2930, lr_0 = 2.7932e-04
Loss = 3.9553e-03, PNorm = 150.7511, GNorm = 0.0898, lr_0 = 2.7913e-04
Loss = 4.7584e-03, PNorm = 150.7570, GNorm = 0.3562, lr_0 = 2.7894e-04
Loss = 4.7660e-03, PNorm = 150.7624, GNorm = 0.2232, lr_0 = 2.7875e-04
Loss = 4.9940e-03, PNorm = 150.7708, GNorm = 0.1368, lr_0 = 2.7855e-04
Loss = 4.8773e-03, PNorm = 150.7772, GNorm = 0.0798, lr_0 = 2.7836e-04
Loss = 2.9134e-03, PNorm = 150.7858, GNorm = 0.1443, lr_0 = 2.7817e-04
Loss = 2.8244e-03, PNorm = 150.7946, GNorm = 0.1795, lr_0 = 2.7798e-04
Loss = 4.6013e-03, PNorm = 150.8002, GNorm = 0.4704, lr_0 = 2.7779e-04
Loss = 6.8011e-03, PNorm = 150.8066, GNorm = 0.1026, lr_0 = 2.7760e-04
Loss = 3.5010e-03, PNorm = 150.8144, GNorm = 0.3197, lr_0 = 2.7741e-04
Loss = 3.1057e-03, PNorm = 150.8237, GNorm = 0.0928, lr_0 = 2.7722e-04
Loss = 3.3429e-03, PNorm = 150.8343, GNorm = 0.2445, lr_0 = 2.7703e-04
Loss = 4.2498e-03, PNorm = 150.8377, GNorm = 0.1892, lr_0 = 2.7684e-04
Loss = 3.8278e-03, PNorm = 150.8439, GNorm = 0.2384, lr_0 = 2.7665e-04
Loss = 3.4771e-03, PNorm = 150.8533, GNorm = 0.3738, lr_0 = 2.7646e-04
Loss = 3.1507e-03, PNorm = 150.8630, GNorm = 0.1975, lr_0 = 2.7627e-04
Loss = 3.6711e-03, PNorm = 150.8694, GNorm = 0.3843, lr_0 = 2.7608e-04
Loss = 6.1270e-03, PNorm = 150.8762, GNorm = 0.1992, lr_0 = 2.7590e-04
Loss = 3.6814e-03, PNorm = 150.8835, GNorm = 0.1655, lr_0 = 2.7571e-04
Loss = 3.7950e-03, PNorm = 150.8908, GNorm = 0.2160, lr_0 = 2.7552e-04
Loss = 5.5709e-03, PNorm = 150.9010, GNorm = 0.0941, lr_0 = 2.7533e-04
Loss = 3.3601e-03, PNorm = 150.9091, GNorm = 0.1933, lr_0 = 2.7514e-04
Loss = 3.4711e-03, PNorm = 150.9163, GNorm = 0.0753, lr_0 = 2.7495e-04
Loss = 2.8426e-03, PNorm = 150.9227, GNorm = 0.0851, lr_0 = 2.7476e-04
Loss = 2.9915e-03, PNorm = 150.9305, GNorm = 0.1117, lr_0 = 2.7457e-04
Loss = 4.0628e-03, PNorm = 150.9372, GNorm = 0.2252, lr_0 = 2.7439e-04
Loss = 4.5520e-03, PNorm = 150.9450, GNorm = 0.1830, lr_0 = 2.7420e-04
Loss = 3.2303e-03, PNorm = 150.9544, GNorm = 0.1548, lr_0 = 2.7401e-04
Loss = 3.7501e-03, PNorm = 150.9648, GNorm = 0.1894, lr_0 = 2.7382e-04
Loss = 4.2080e-03, PNorm = 150.9749, GNorm = 0.2339, lr_0 = 2.7364e-04
Loss = 3.5803e-03, PNorm = 150.9865, GNorm = 0.0769, lr_0 = 2.7345e-04
Loss = 3.6205e-03, PNorm = 150.9927, GNorm = 0.0595, lr_0 = 2.7326e-04
Loss = 3.6033e-03, PNorm = 151.0014, GNorm = 0.1062, lr_0 = 2.7307e-04
Loss = 3.4769e-03, PNorm = 151.0106, GNorm = 0.1713, lr_0 = 2.7289e-04
Loss = 3.8840e-03, PNorm = 151.0190, GNorm = 0.1150, lr_0 = 2.7270e-04
Loss = 4.0298e-03, PNorm = 151.0248, GNorm = 0.1595, lr_0 = 2.7251e-04
Loss = 5.1165e-03, PNorm = 151.0298, GNorm = 0.1236, lr_0 = 2.7233e-04
Loss = 4.0986e-03, PNorm = 151.0352, GNorm = 0.0700, lr_0 = 2.7214e-04
Loss = 4.4898e-03, PNorm = 151.0436, GNorm = 0.1268, lr_0 = 2.7195e-04
Loss = 3.6040e-03, PNorm = 151.0500, GNorm = 0.1881, lr_0 = 2.7177e-04
Loss = 5.2963e-03, PNorm = 151.0590, GNorm = 0.5773, lr_0 = 2.7158e-04
Loss = 3.9114e-03, PNorm = 151.0689, GNorm = 0.1202, lr_0 = 2.7139e-04
Loss = 4.1671e-03, PNorm = 151.0778, GNorm = 0.1292, lr_0 = 2.7121e-04
Loss = 3.6348e-03, PNorm = 151.0846, GNorm = 0.5625, lr_0 = 2.7102e-04
Loss = 3.5745e-03, PNorm = 151.0929, GNorm = 0.5442, lr_0 = 2.7084e-04
Loss = 5.8037e-03, PNorm = 151.0985, GNorm = 0.2294, lr_0 = 2.7065e-04
Loss = 3.8563e-03, PNorm = 151.1117, GNorm = 0.2916, lr_0 = 2.7047e-04
Loss = 7.8662e-03, PNorm = 151.1221, GNorm = 0.0974, lr_0 = 2.7028e-04
Loss = 4.1867e-03, PNorm = 151.1291, GNorm = 0.3183, lr_0 = 2.7010e-04
Loss = 3.9809e-03, PNorm = 151.1382, GNorm = 0.3323, lr_0 = 2.6991e-04
Loss = 5.2409e-03, PNorm = 151.1487, GNorm = 0.2682, lr_0 = 2.6973e-04
Loss = 4.9693e-03, PNorm = 151.1596, GNorm = 0.1897, lr_0 = 2.6954e-04
Loss = 4.5185e-03, PNorm = 151.1679, GNorm = 0.0666, lr_0 = 2.6936e-04
Loss = 2.7819e-03, PNorm = 151.1769, GNorm = 0.3785, lr_0 = 2.6917e-04
Loss = 4.1977e-03, PNorm = 151.1865, GNorm = 0.2171, lr_0 = 2.6899e-04
Loss = 3.3825e-03, PNorm = 151.1964, GNorm = 0.3336, lr_0 = 2.6880e-04
Loss = 4.9386e-03, PNorm = 151.2062, GNorm = 0.4244, lr_0 = 2.6862e-04
Loss = 4.1347e-03, PNorm = 151.2142, GNorm = 0.4218, lr_0 = 2.6844e-04
Loss = 3.3100e-03, PNorm = 151.2232, GNorm = 0.0923, lr_0 = 2.6825e-04
Validation mae = 0.476048
Epoch 18
Loss = 3.4167e-03, PNorm = 151.2293, GNorm = 0.1129, lr_0 = 2.6807e-04
Loss = 2.9343e-03, PNorm = 151.2335, GNorm = 0.2362, lr_0 = 2.6788e-04
Loss = 3.0636e-03, PNorm = 151.2400, GNorm = 0.1455, lr_0 = 2.6770e-04
Loss = 3.3400e-03, PNorm = 151.2449, GNorm = 0.3490, lr_0 = 2.6752e-04
Loss = 3.8831e-03, PNorm = 151.2498, GNorm = 0.2582, lr_0 = 2.6733e-04
Loss = 3.2675e-03, PNorm = 151.2546, GNorm = 0.0850, lr_0 = 2.6715e-04
Loss = 3.1289e-03, PNorm = 151.2595, GNorm = 0.2860, lr_0 = 2.6697e-04
Loss = 3.7837e-03, PNorm = 151.2653, GNorm = 0.0697, lr_0 = 2.6678e-04
Loss = 2.9153e-03, PNorm = 151.2712, GNorm = 0.1042, lr_0 = 2.6660e-04
Loss = 3.2931e-03, PNorm = 151.2764, GNorm = 0.2525, lr_0 = 2.6642e-04
Loss = 2.6093e-03, PNorm = 151.2830, GNorm = 0.1435, lr_0 = 2.6624e-04
Loss = 3.0173e-03, PNorm = 151.2893, GNorm = 0.1280, lr_0 = 2.6605e-04
Loss = 3.1305e-03, PNorm = 151.2958, GNorm = 0.2704, lr_0 = 2.6587e-04
Loss = 2.9073e-03, PNorm = 151.3032, GNorm = 0.1266, lr_0 = 2.6569e-04
Loss = 4.8075e-03, PNorm = 151.3089, GNorm = 0.6057, lr_0 = 2.6551e-04
Loss = 4.4330e-03, PNorm = 151.3117, GNorm = 0.3065, lr_0 = 2.6533e-04
Loss = 3.2780e-03, PNorm = 151.3173, GNorm = 0.2483, lr_0 = 2.6514e-04
Loss = 3.6583e-03, PNorm = 151.3261, GNorm = 0.1521, lr_0 = 2.6496e-04
Loss = 4.1706e-03, PNorm = 151.3337, GNorm = 0.1452, lr_0 = 2.6478e-04
Loss = 3.3876e-03, PNorm = 151.3433, GNorm = 0.3027, lr_0 = 2.6460e-04
Loss = 4.6977e-03, PNorm = 151.3474, GNorm = 0.3741, lr_0 = 2.6442e-04
Loss = 2.6751e-03, PNorm = 151.3555, GNorm = 0.0737, lr_0 = 2.6424e-04
Loss = 5.1528e-03, PNorm = 151.3606, GNorm = 0.1359, lr_0 = 2.6406e-04
Loss = 2.6970e-03, PNorm = 151.3660, GNorm = 0.1190, lr_0 = 2.6388e-04
Loss = 3.2584e-03, PNorm = 151.3732, GNorm = 0.1797, lr_0 = 2.6369e-04
Loss = 2.7610e-03, PNorm = 151.3788, GNorm = 0.1650, lr_0 = 2.6351e-04
Loss = 2.3887e-03, PNorm = 151.3837, GNorm = 0.1185, lr_0 = 2.6333e-04
Loss = 3.6106e-03, PNorm = 151.3882, GNorm = 0.0748, lr_0 = 2.6315e-04
Loss = 2.6082e-03, PNorm = 151.3967, GNorm = 0.1675, lr_0 = 2.6297e-04
Loss = 2.7036e-03, PNorm = 151.4043, GNorm = 0.1133, lr_0 = 2.6279e-04
Loss = 2.5518e-03, PNorm = 151.4111, GNorm = 0.1539, lr_0 = 2.6261e-04
Loss = 4.7288e-03, PNorm = 151.4167, GNorm = 0.3088, lr_0 = 2.6243e-04
Loss = 3.3866e-03, PNorm = 151.4217, GNorm = 0.0613, lr_0 = 2.6225e-04
Loss = 3.3623e-03, PNorm = 151.4277, GNorm = 0.1423, lr_0 = 2.6207e-04
Loss = 3.4781e-03, PNorm = 151.4352, GNorm = 0.0660, lr_0 = 2.6189e-04
Loss = 2.7563e-03, PNorm = 151.4438, GNorm = 0.1882, lr_0 = 2.6171e-04
Loss = 2.6410e-03, PNorm = 151.4520, GNorm = 0.1626, lr_0 = 2.6153e-04
Loss = 2.8396e-03, PNorm = 151.4593, GNorm = 0.1713, lr_0 = 2.6136e-04
Loss = 6.0208e-03, PNorm = 151.4701, GNorm = 0.1642, lr_0 = 2.6118e-04
Loss = 2.3212e-03, PNorm = 151.4765, GNorm = 0.0991, lr_0 = 2.6100e-04
Loss = 3.0575e-03, PNorm = 151.4817, GNorm = 0.3029, lr_0 = 2.6082e-04
Loss = 2.9268e-03, PNorm = 151.4876, GNorm = 0.2116, lr_0 = 2.6064e-04
Loss = 2.7001e-03, PNorm = 151.4953, GNorm = 0.1123, lr_0 = 2.6046e-04
Loss = 4.0987e-03, PNorm = 151.5012, GNorm = 0.1202, lr_0 = 2.6028e-04
Loss = 4.2156e-03, PNorm = 151.5073, GNorm = 0.2323, lr_0 = 2.6011e-04
Loss = 3.1969e-03, PNorm = 151.5127, GNorm = 0.1397, lr_0 = 2.5993e-04
Loss = 2.8816e-03, PNorm = 151.5221, GNorm = 0.0695, lr_0 = 2.5975e-04
Loss = 3.2312e-03, PNorm = 151.5296, GNorm = 0.1686, lr_0 = 2.5957e-04
Loss = 4.6617e-03, PNorm = 151.5366, GNorm = 0.0987, lr_0 = 2.5939e-04
Loss = 4.6701e-03, PNorm = 151.5452, GNorm = 0.1278, lr_0 = 2.5922e-04
Loss = 3.7432e-03, PNorm = 151.5502, GNorm = 0.1058, lr_0 = 2.5904e-04
Loss = 2.4286e-03, PNorm = 151.5542, GNorm = 0.2687, lr_0 = 2.5886e-04
Loss = 3.0936e-03, PNorm = 151.5596, GNorm = 0.1101, lr_0 = 2.5868e-04
Loss = 4.3352e-03, PNorm = 151.5661, GNorm = 0.1641, lr_0 = 2.5851e-04
Loss = 4.6827e-03, PNorm = 151.5742, GNorm = 0.1632, lr_0 = 2.5833e-04
Loss = 3.3476e-03, PNorm = 151.5821, GNorm = 0.1428, lr_0 = 2.5815e-04
Loss = 5.6243e-03, PNorm = 151.5892, GNorm = 0.2756, lr_0 = 2.5797e-04
Loss = 3.8131e-03, PNorm = 151.5938, GNorm = 0.2295, lr_0 = 2.5780e-04
Loss = 2.5931e-03, PNorm = 151.6012, GNorm = 0.0598, lr_0 = 2.5762e-04
Loss = 3.0583e-03, PNorm = 151.6067, GNorm = 0.2002, lr_0 = 2.5745e-04
Loss = 2.4175e-03, PNorm = 151.6145, GNorm = 0.0869, lr_0 = 2.5727e-04
Loss = 3.5833e-03, PNorm = 151.6203, GNorm = 0.3356, lr_0 = 2.5709e-04
Loss = 3.6417e-03, PNorm = 151.6254, GNorm = 0.2037, lr_0 = 2.5692e-04
Loss = 2.6633e-03, PNorm = 151.6329, GNorm = 0.1009, lr_0 = 2.5674e-04
Loss = 4.5136e-03, PNorm = 151.6430, GNorm = 0.3787, lr_0 = 2.5656e-04
Loss = 2.7096e-03, PNorm = 151.6510, GNorm = 0.1029, lr_0 = 2.5639e-04
Loss = 3.4098e-03, PNorm = 151.6574, GNorm = 0.1623, lr_0 = 2.5621e-04
Loss = 2.8434e-03, PNorm = 151.6606, GNorm = 0.1021, lr_0 = 2.5604e-04
Loss = 3.2843e-03, PNorm = 151.6645, GNorm = 0.1938, lr_0 = 2.5586e-04
Loss = 4.3065e-03, PNorm = 151.6710, GNorm = 0.3098, lr_0 = 2.5569e-04
Loss = 3.7792e-03, PNorm = 151.6774, GNorm = 0.1618, lr_0 = 2.5551e-04
Loss = 3.0640e-03, PNorm = 151.6848, GNorm = 0.0872, lr_0 = 2.5534e-04
Loss = 4.2441e-03, PNorm = 151.6919, GNorm = 0.0847, lr_0 = 2.5516e-04
Loss = 2.4284e-03, PNorm = 151.7008, GNorm = 0.0494, lr_0 = 2.5499e-04
Loss = 3.7650e-03, PNorm = 151.7102, GNorm = 0.0863, lr_0 = 2.5481e-04
Loss = 2.7769e-03, PNorm = 151.7147, GNorm = 0.1459, lr_0 = 2.5464e-04
Loss = 5.4161e-03, PNorm = 151.7194, GNorm = 0.1655, lr_0 = 2.5446e-04
Loss = 3.3721e-03, PNorm = 151.7247, GNorm = 0.1241, lr_0 = 2.5429e-04
Loss = 6.6759e-03, PNorm = 151.7286, GNorm = 0.1632, lr_0 = 2.5411e-04
Loss = 2.9067e-03, PNorm = 151.7312, GNorm = 0.0805, lr_0 = 2.5394e-04
Loss = 2.9323e-03, PNorm = 151.7369, GNorm = 0.2031, lr_0 = 2.5377e-04
Loss = 3.2795e-03, PNorm = 151.7439, GNorm = 0.1272, lr_0 = 2.5359e-04
Loss = 3.4353e-03, PNorm = 151.7511, GNorm = 0.0574, lr_0 = 2.5342e-04
Loss = 2.5004e-03, PNorm = 151.7594, GNorm = 0.1474, lr_0 = 2.5325e-04
Loss = 2.7925e-03, PNorm = 151.7716, GNorm = 0.2308, lr_0 = 2.5307e-04
Loss = 2.7166e-03, PNorm = 151.7817, GNorm = 0.0834, lr_0 = 2.5290e-04
Loss = 3.1340e-03, PNorm = 151.7881, GNorm = 0.1438, lr_0 = 2.5273e-04
Loss = 2.9611e-03, PNorm = 151.7926, GNorm = 0.2848, lr_0 = 2.5255e-04
Loss = 4.8706e-03, PNorm = 151.7958, GNorm = 0.1458, lr_0 = 2.5238e-04
Loss = 3.6100e-03, PNorm = 151.8023, GNorm = 0.0717, lr_0 = 2.5221e-04
Loss = 2.7077e-03, PNorm = 151.8089, GNorm = 0.0991, lr_0 = 2.5203e-04
Loss = 2.9598e-03, PNorm = 151.8135, GNorm = 0.1079, lr_0 = 2.5186e-04
Loss = 2.5501e-03, PNorm = 151.8199, GNorm = 0.0397, lr_0 = 2.5169e-04
Loss = 4.5014e-03, PNorm = 151.8296, GNorm = 0.1905, lr_0 = 2.5152e-04
Loss = 4.0477e-03, PNorm = 151.8387, GNorm = 0.1812, lr_0 = 2.5134e-04
Loss = 2.7354e-03, PNorm = 151.8449, GNorm = 0.1936, lr_0 = 2.5117e-04
Loss = 3.3282e-03, PNorm = 151.8499, GNorm = 0.0599, lr_0 = 2.5100e-04
Loss = 3.8411e-03, PNorm = 151.8568, GNorm = 0.1251, lr_0 = 2.5083e-04
Loss = 2.5133e-03, PNorm = 151.8622, GNorm = 0.0948, lr_0 = 2.5066e-04
Loss = 2.6623e-03, PNorm = 151.8697, GNorm = 0.1662, lr_0 = 2.5048e-04
Loss = 3.7971e-03, PNorm = 151.8769, GNorm = 0.0940, lr_0 = 2.5031e-04
Loss = 2.7764e-03, PNorm = 151.8841, GNorm = 0.1341, lr_0 = 2.5014e-04
Loss = 3.7398e-03, PNorm = 151.8900, GNorm = 0.2143, lr_0 = 2.4997e-04
Loss = 2.3310e-03, PNorm = 151.8978, GNorm = 0.0628, lr_0 = 2.4980e-04
Loss = 3.4064e-03, PNorm = 151.9070, GNorm = 0.1223, lr_0 = 2.4963e-04
Loss = 4.7544e-03, PNorm = 151.9133, GNorm = 0.1269, lr_0 = 2.4946e-04
Loss = 3.3160e-03, PNorm = 151.9210, GNorm = 0.1211, lr_0 = 2.4929e-04
Loss = 3.1863e-03, PNorm = 151.9315, GNorm = 0.1564, lr_0 = 2.4911e-04
Loss = 3.7827e-03, PNorm = 151.9411, GNorm = 0.0915, lr_0 = 2.4894e-04
Loss = 2.6155e-03, PNorm = 151.9482, GNorm = 0.3666, lr_0 = 2.4877e-04
Loss = 3.7927e-03, PNorm = 151.9528, GNorm = 0.1112, lr_0 = 2.4860e-04
Loss = 3.1278e-03, PNorm = 151.9588, GNorm = 0.2484, lr_0 = 2.4843e-04
Loss = 3.4681e-03, PNorm = 151.9638, GNorm = 0.2244, lr_0 = 2.4826e-04
Loss = 3.2084e-03, PNorm = 151.9700, GNorm = 0.1115, lr_0 = 2.4809e-04
Loss = 2.6339e-03, PNorm = 151.9776, GNorm = 0.0603, lr_0 = 2.4792e-04
Loss = 3.7002e-03, PNorm = 151.9809, GNorm = 0.3697, lr_0 = 2.4775e-04
Loss = 4.3504e-03, PNorm = 151.9860, GNorm = 0.1128, lr_0 = 2.4758e-04
Loss = 2.8211e-03, PNorm = 151.9927, GNorm = 0.1069, lr_0 = 2.4741e-04
Loss = 3.6486e-03, PNorm = 152.0005, GNorm = 0.1069, lr_0 = 2.4724e-04
Loss = 3.4210e-03, PNorm = 152.0095, GNorm = 0.0886, lr_0 = 2.4707e-04
Validation mae = 0.476687
Epoch 19
Loss = 2.5002e-03, PNorm = 152.0161, GNorm = 0.1159, lr_0 = 2.4690e-04
Loss = 2.4368e-03, PNorm = 152.0196, GNorm = 0.0820, lr_0 = 2.4674e-04
Loss = 2.7631e-03, PNorm = 152.0214, GNorm = 0.0548, lr_0 = 2.4657e-04
Loss = 3.1807e-03, PNorm = 152.0260, GNorm = 0.1243, lr_0 = 2.4640e-04
Loss = 2.8943e-03, PNorm = 152.0312, GNorm = 0.3082, lr_0 = 2.4623e-04
Loss = 2.2791e-03, PNorm = 152.0342, GNorm = 0.0692, lr_0 = 2.4606e-04
Loss = 2.9267e-03, PNorm = 152.0385, GNorm = 0.2348, lr_0 = 2.4589e-04
Loss = 2.1749e-03, PNorm = 152.0453, GNorm = 0.1007, lr_0 = 2.4572e-04
Loss = 2.8827e-03, PNorm = 152.0501, GNorm = 0.1036, lr_0 = 2.4556e-04
Loss = 2.9495e-03, PNorm = 152.0542, GNorm = 0.1547, lr_0 = 2.4539e-04
Loss = 2.6334e-03, PNorm = 152.0574, GNorm = 0.2497, lr_0 = 2.4522e-04
Loss = 5.3105e-03, PNorm = 152.0625, GNorm = 0.1787, lr_0 = 2.4505e-04
Loss = 2.0605e-03, PNorm = 152.0693, GNorm = 0.1540, lr_0 = 2.4488e-04
Loss = 2.1444e-03, PNorm = 152.0736, GNorm = 0.1557, lr_0 = 2.4472e-04
Loss = 2.9186e-03, PNorm = 152.0801, GNorm = 0.1115, lr_0 = 2.4455e-04
Loss = 2.4594e-03, PNorm = 152.0860, GNorm = 0.0793, lr_0 = 2.4438e-04
Loss = 2.4436e-03, PNorm = 152.0938, GNorm = 0.1510, lr_0 = 2.4421e-04
Loss = 2.5418e-03, PNorm = 152.0994, GNorm = 0.0555, lr_0 = 2.4405e-04
Loss = 2.3275e-03, PNorm = 152.1062, GNorm = 0.0751, lr_0 = 2.4388e-04
Loss = 2.7249e-03, PNorm = 152.1134, GNorm = 0.0457, lr_0 = 2.4371e-04
Loss = 2.6515e-03, PNorm = 152.1203, GNorm = 0.2265, lr_0 = 2.4354e-04
Loss = 2.7261e-03, PNorm = 152.1222, GNorm = 0.1037, lr_0 = 2.4338e-04
Loss = 2.1085e-03, PNorm = 152.1261, GNorm = 0.1583, lr_0 = 2.4321e-04
Loss = 3.2349e-03, PNorm = 152.1325, GNorm = 0.1690, lr_0 = 2.4304e-04
Loss = 2.4213e-03, PNorm = 152.1388, GNorm = 0.2575, lr_0 = 2.4288e-04
Loss = 2.9886e-03, PNorm = 152.1454, GNorm = 0.0601, lr_0 = 2.4271e-04
Loss = 2.1172e-03, PNorm = 152.1521, GNorm = 0.2104, lr_0 = 2.4254e-04
Loss = 2.4333e-03, PNorm = 152.1584, GNorm = 0.1407, lr_0 = 2.4238e-04
Loss = 2.7564e-03, PNorm = 152.1630, GNorm = 0.2013, lr_0 = 2.4221e-04
Loss = 3.1098e-03, PNorm = 152.1665, GNorm = 0.1682, lr_0 = 2.4205e-04
Loss = 2.4226e-03, PNorm = 152.1727, GNorm = 0.1207, lr_0 = 2.4188e-04
Loss = 4.0370e-03, PNorm = 152.1758, GNorm = 0.1077, lr_0 = 2.4171e-04
Loss = 1.9365e-03, PNorm = 152.1817, GNorm = 0.1078, lr_0 = 2.4155e-04
Loss = 2.8224e-03, PNorm = 152.1853, GNorm = 0.1899, lr_0 = 2.4138e-04
Loss = 3.7017e-03, PNorm = 152.1898, GNorm = 0.1422, lr_0 = 2.4122e-04
Loss = 2.7202e-03, PNorm = 152.1987, GNorm = 0.1625, lr_0 = 2.4105e-04
Loss = 3.8942e-03, PNorm = 152.2055, GNorm = 0.1269, lr_0 = 2.4089e-04
Loss = 2.4743e-03, PNorm = 152.2110, GNorm = 0.1468, lr_0 = 2.4072e-04
Loss = 2.5717e-03, PNorm = 152.2162, GNorm = 0.1035, lr_0 = 2.4056e-04
Loss = 2.2167e-03, PNorm = 152.2202, GNorm = 0.1236, lr_0 = 2.4039e-04
Loss = 3.5023e-03, PNorm = 152.2279, GNorm = 0.2818, lr_0 = 2.4023e-04
Loss = 2.5865e-03, PNorm = 152.2361, GNorm = 0.1076, lr_0 = 2.4006e-04
Loss = 3.9658e-03, PNorm = 152.2446, GNorm = 0.1477, lr_0 = 2.3990e-04
Loss = 2.8931e-03, PNorm = 152.2488, GNorm = 0.4078, lr_0 = 2.3974e-04
Loss = 2.4319e-03, PNorm = 152.2554, GNorm = 0.1792, lr_0 = 2.3957e-04
Loss = 2.8105e-03, PNorm = 152.2611, GNorm = 0.2908, lr_0 = 2.3941e-04
Loss = 2.5112e-03, PNorm = 152.2657, GNorm = 0.0685, lr_0 = 2.3924e-04
Loss = 1.9341e-03, PNorm = 152.2703, GNorm = 0.0780, lr_0 = 2.3908e-04
Loss = 2.1671e-03, PNorm = 152.2737, GNorm = 0.0483, lr_0 = 2.3892e-04
Loss = 2.0865e-03, PNorm = 152.2803, GNorm = 0.2375, lr_0 = 2.3875e-04
Loss = 2.8485e-03, PNorm = 152.2904, GNorm = 0.1115, lr_0 = 2.3859e-04
Loss = 4.2832e-03, PNorm = 152.2969, GNorm = 0.1603, lr_0 = 2.3842e-04
Loss = 2.2548e-03, PNorm = 152.3032, GNorm = 0.1425, lr_0 = 2.3826e-04
Loss = 2.0615e-03, PNorm = 152.3091, GNorm = 0.0900, lr_0 = 2.3810e-04
Loss = 3.9664e-03, PNorm = 152.3162, GNorm = 0.2401, lr_0 = 2.3794e-04
Loss = 4.3952e-03, PNorm = 152.3198, GNorm = 0.3000, lr_0 = 2.3777e-04
Loss = 2.3407e-03, PNorm = 152.3291, GNorm = 0.0607, lr_0 = 2.3761e-04
Loss = 2.0724e-03, PNorm = 152.3358, GNorm = 0.0881, lr_0 = 2.3745e-04
Loss = 2.5418e-03, PNorm = 152.3406, GNorm = 0.2533, lr_0 = 2.3728e-04
Loss = 3.6773e-03, PNorm = 152.3454, GNorm = 0.1179, lr_0 = 2.3712e-04
Loss = 2.7701e-03, PNorm = 152.3518, GNorm = 0.2286, lr_0 = 2.3696e-04
Loss = 3.7595e-03, PNorm = 152.3613, GNorm = 0.0887, lr_0 = 2.3680e-04
Loss = 4.7066e-03, PNorm = 152.3667, GNorm = 0.1113, lr_0 = 2.3663e-04
Loss = 2.2116e-03, PNorm = 152.3714, GNorm = 0.0735, lr_0 = 2.3647e-04
Loss = 2.3239e-03, PNorm = 152.3778, GNorm = 0.2033, lr_0 = 2.3631e-04
Loss = 5.5552e-03, PNorm = 152.3819, GNorm = 0.1678, lr_0 = 2.3615e-04
Loss = 2.4843e-03, PNorm = 152.3895, GNorm = 0.0459, lr_0 = 2.3599e-04
Loss = 2.3550e-03, PNorm = 152.3970, GNorm = 0.0970, lr_0 = 2.3582e-04
Loss = 3.2122e-03, PNorm = 152.4018, GNorm = 0.1045, lr_0 = 2.3566e-04
Loss = 3.2244e-03, PNorm = 152.4078, GNorm = 0.0853, lr_0 = 2.3550e-04
Loss = 2.2148e-03, PNorm = 152.4142, GNorm = 0.2270, lr_0 = 2.3534e-04
Loss = 3.0184e-03, PNorm = 152.4180, GNorm = 0.1241, lr_0 = 2.3518e-04
Loss = 3.4073e-03, PNorm = 152.4246, GNorm = 0.1840, lr_0 = 2.3502e-04
Loss = 2.2614e-03, PNorm = 152.4324, GNorm = 0.4677, lr_0 = 2.3486e-04
Loss = 2.7361e-03, PNorm = 152.4394, GNorm = 0.3598, lr_0 = 2.3470e-04
Loss = 2.2892e-03, PNorm = 152.4489, GNorm = 0.1170, lr_0 = 2.3454e-04
Loss = 2.9985e-03, PNorm = 152.4549, GNorm = 0.2823, lr_0 = 2.3437e-04
Loss = 2.2873e-03, PNorm = 152.4605, GNorm = 0.0487, lr_0 = 2.3421e-04
Loss = 2.0393e-03, PNorm = 152.4646, GNorm = 0.1195, lr_0 = 2.3405e-04
Loss = 4.1584e-03, PNorm = 152.4676, GNorm = 0.1707, lr_0 = 2.3389e-04
Loss = 5.4288e-03, PNorm = 152.4706, GNorm = 0.1100, lr_0 = 2.3373e-04
Loss = 2.4317e-03, PNorm = 152.4753, GNorm = 0.1159, lr_0 = 2.3357e-04
Loss = 2.2931e-03, PNorm = 152.4796, GNorm = 0.1282, lr_0 = 2.3341e-04
Loss = 2.1879e-03, PNorm = 152.4849, GNorm = 0.2800, lr_0 = 2.3325e-04
Loss = 3.1722e-03, PNorm = 152.4923, GNorm = 0.2958, lr_0 = 2.3309e-04
Loss = 3.3358e-03, PNorm = 152.4986, GNorm = 0.1017, lr_0 = 2.3293e-04
Loss = 3.1241e-03, PNorm = 152.5046, GNorm = 0.1090, lr_0 = 2.3277e-04
Loss = 2.2159e-03, PNorm = 152.5116, GNorm = 0.0565, lr_0 = 2.3261e-04
Loss = 2.9036e-03, PNorm = 152.5179, GNorm = 0.1338, lr_0 = 2.3246e-04
Loss = 4.0959e-03, PNorm = 152.5255, GNorm = 0.1026, lr_0 = 2.3230e-04
Loss = 2.1931e-03, PNorm = 152.5343, GNorm = 0.1117, lr_0 = 2.3214e-04
Loss = 2.0817e-03, PNorm = 152.5412, GNorm = 0.1904, lr_0 = 2.3198e-04
Loss = 3.8086e-03, PNorm = 152.5454, GNorm = 0.0824, lr_0 = 2.3182e-04
Loss = 3.5126e-03, PNorm = 152.5481, GNorm = 0.1671, lr_0 = 2.3166e-04
Loss = 7.3832e-03, PNorm = 152.5532, GNorm = 0.4796, lr_0 = 2.3150e-04
Loss = 2.6090e-03, PNorm = 152.5560, GNorm = 0.1145, lr_0 = 2.3134e-04
Loss = 3.5078e-03, PNorm = 152.5603, GNorm = 0.1126, lr_0 = 2.3118e-04
Loss = 2.7639e-03, PNorm = 152.5700, GNorm = 0.3855, lr_0 = 2.3103e-04
Loss = 2.1732e-03, PNorm = 152.5778, GNorm = 0.2044, lr_0 = 2.3087e-04
Loss = 2.5594e-03, PNorm = 152.5823, GNorm = 0.1459, lr_0 = 2.3071e-04
Loss = 2.8103e-03, PNorm = 152.5869, GNorm = 0.0980, lr_0 = 2.3055e-04
Loss = 3.7328e-03, PNorm = 152.5909, GNorm = 0.0971, lr_0 = 2.3039e-04
Loss = 2.4466e-03, PNorm = 152.5967, GNorm = 0.1688, lr_0 = 2.3024e-04
Loss = 2.3517e-03, PNorm = 152.6023, GNorm = 0.1340, lr_0 = 2.3008e-04
Loss = 2.1337e-03, PNorm = 152.6085, GNorm = 0.1909, lr_0 = 2.2992e-04
Loss = 2.1260e-03, PNorm = 152.6172, GNorm = 0.0708, lr_0 = 2.2976e-04
Loss = 2.6653e-03, PNorm = 152.6220, GNorm = 0.2134, lr_0 = 2.2961e-04
Loss = 5.4081e-03, PNorm = 152.6280, GNorm = 0.2138, lr_0 = 2.2945e-04
Loss = 2.5837e-03, PNorm = 152.6326, GNorm = 0.1335, lr_0 = 2.2929e-04
Loss = 3.0630e-03, PNorm = 152.6381, GNorm = 0.5315, lr_0 = 2.2913e-04
Loss = 4.3039e-03, PNorm = 152.6435, GNorm = 0.1542, lr_0 = 2.2898e-04
Loss = 3.2475e-03, PNorm = 152.6537, GNorm = 0.1258, lr_0 = 2.2882e-04
Loss = 4.3350e-03, PNorm = 152.6578, GNorm = 0.2359, lr_0 = 2.2866e-04
Loss = 6.4567e-03, PNorm = 152.6628, GNorm = 0.2698, lr_0 = 2.2851e-04
Loss = 2.2090e-03, PNorm = 152.6710, GNorm = 0.1597, lr_0 = 2.2835e-04
Loss = 4.4622e-03, PNorm = 152.6776, GNorm = 0.1464, lr_0 = 2.2819e-04
Loss = 3.7425e-03, PNorm = 152.6814, GNorm = 0.3154, lr_0 = 2.2804e-04
Loss = 2.2910e-03, PNorm = 152.6870, GNorm = 0.1414, lr_0 = 2.2788e-04
Loss = 5.3906e-03, PNorm = 152.6936, GNorm = 0.1156, lr_0 = 2.2773e-04
Loss = 4.6890e-03, PNorm = 152.6989, GNorm = 0.3601, lr_0 = 2.2757e-04
Validation mae = 0.476046
Epoch 20
Loss = 3.1848e-03, PNorm = 152.7045, GNorm = 0.1518, lr_0 = 2.2741e-04
Loss = 3.0901e-03, PNorm = 152.7102, GNorm = 0.2198, lr_0 = 2.2726e-04
Loss = 3.3819e-03, PNorm = 152.7166, GNorm = 0.1666, lr_0 = 2.2710e-04
Loss = 3.0968e-03, PNorm = 152.7239, GNorm = 0.1808, lr_0 = 2.2695e-04
Loss = 1.9400e-03, PNorm = 152.7273, GNorm = 0.1526, lr_0 = 2.2679e-04
Loss = 3.1462e-03, PNorm = 152.7293, GNorm = 0.1644, lr_0 = 2.2664e-04
Loss = 4.4782e-03, PNorm = 152.7340, GNorm = 0.3098, lr_0 = 2.2648e-04
Loss = 3.0957e-03, PNorm = 152.7372, GNorm = 0.2479, lr_0 = 2.2632e-04
Loss = 2.5242e-03, PNorm = 152.7411, GNorm = 0.0705, lr_0 = 2.2617e-04
Loss = 2.3236e-03, PNorm = 152.7440, GNorm = 0.0589, lr_0 = 2.2601e-04
Loss = 4.4960e-03, PNorm = 152.7472, GNorm = 0.3008, lr_0 = 2.2586e-04
Loss = 2.9249e-03, PNorm = 152.7500, GNorm = 0.1512, lr_0 = 2.2571e-04
Loss = 2.4902e-03, PNorm = 152.7553, GNorm = 0.2618, lr_0 = 2.2555e-04
Loss = 2.0979e-03, PNorm = 152.7601, GNorm = 0.1755, lr_0 = 2.2540e-04
Loss = 2.1079e-03, PNorm = 152.7641, GNorm = 0.1065, lr_0 = 2.2524e-04
Loss = 2.5636e-03, PNorm = 152.7697, GNorm = 0.2728, lr_0 = 2.2509e-04
Loss = 2.0975e-03, PNorm = 152.7741, GNorm = 0.1260, lr_0 = 2.2493e-04
Loss = 2.1449e-03, PNorm = 152.7761, GNorm = 0.1454, lr_0 = 2.2478e-04
Loss = 2.1662e-03, PNorm = 152.7812, GNorm = 0.1615, lr_0 = 2.2463e-04
Loss = 2.3600e-03, PNorm = 152.7865, GNorm = 0.0643, lr_0 = 2.2447e-04
Loss = 1.7655e-03, PNorm = 152.7921, GNorm = 0.2641, lr_0 = 2.2432e-04
Loss = 2.4142e-03, PNorm = 152.7972, GNorm = 0.1995, lr_0 = 2.2416e-04
Loss = 2.3137e-03, PNorm = 152.8031, GNorm = 0.0840, lr_0 = 2.2401e-04
Loss = 2.3695e-03, PNorm = 152.8083, GNorm = 0.0600, lr_0 = 2.2386e-04
Loss = 1.9581e-03, PNorm = 152.8121, GNorm = 0.1607, lr_0 = 2.2370e-04
Loss = 3.4991e-03, PNorm = 152.8193, GNorm = 0.1169, lr_0 = 2.2355e-04
Loss = 2.0434e-03, PNorm = 152.8229, GNorm = 0.1163, lr_0 = 2.2340e-04
Loss = 2.8844e-03, PNorm = 152.8244, GNorm = 0.2103, lr_0 = 2.2324e-04
Loss = 2.4668e-03, PNorm = 152.8249, GNorm = 0.0496, lr_0 = 2.2309e-04
Loss = 1.9500e-03, PNorm = 152.8287, GNorm = 0.1353, lr_0 = 2.2294e-04
Loss = 2.0559e-03, PNorm = 152.8333, GNorm = 0.1164, lr_0 = 2.2279e-04
Loss = 2.9956e-03, PNorm = 152.8375, GNorm = 0.0549, lr_0 = 2.2263e-04
Loss = 1.7964e-03, PNorm = 152.8433, GNorm = 0.0901, lr_0 = 2.2248e-04
Loss = 2.7663e-03, PNorm = 152.8486, GNorm = 0.0825, lr_0 = 2.2233e-04
Loss = 1.8526e-03, PNorm = 152.8548, GNorm = 0.2976, lr_0 = 2.2218e-04
Loss = 2.5925e-03, PNorm = 152.8596, GNorm = 0.1160, lr_0 = 2.2202e-04
Loss = 2.6662e-03, PNorm = 152.8636, GNorm = 0.0993, lr_0 = 2.2187e-04
Loss = 2.5916e-03, PNorm = 152.8676, GNorm = 0.1050, lr_0 = 2.2172e-04
Loss = 2.0691e-03, PNorm = 152.8693, GNorm = 0.2518, lr_0 = 2.2157e-04
Loss = 1.9034e-03, PNorm = 152.8726, GNorm = 0.0417, lr_0 = 2.2142e-04
Loss = 2.1377e-03, PNorm = 152.8777, GNorm = 0.1380, lr_0 = 2.2126e-04
Loss = 1.7629e-03, PNorm = 152.8826, GNorm = 0.1645, lr_0 = 2.2111e-04
Loss = 2.4209e-03, PNorm = 152.8866, GNorm = 0.0986, lr_0 = 2.2096e-04
Loss = 2.1711e-03, PNorm = 152.8888, GNorm = 0.0395, lr_0 = 2.2081e-04
Loss = 3.9124e-03, PNorm = 152.8939, GNorm = 0.2791, lr_0 = 2.2066e-04
Loss = 2.5327e-03, PNorm = 152.8997, GNorm = 0.0903, lr_0 = 2.2051e-04
Loss = 1.9967e-03, PNorm = 152.9030, GNorm = 0.1788, lr_0 = 2.2036e-04
Loss = 2.0814e-03, PNorm = 152.9083, GNorm = 0.1688, lr_0 = 2.2021e-04
Loss = 2.2934e-03, PNorm = 152.9114, GNorm = 0.1016, lr_0 = 2.2005e-04
Loss = 1.7905e-03, PNorm = 152.9144, GNorm = 0.0670, lr_0 = 2.1990e-04
Loss = 2.3335e-03, PNorm = 152.9194, GNorm = 0.1397, lr_0 = 2.1975e-04
Loss = 2.3383e-03, PNorm = 152.9257, GNorm = 0.1899, lr_0 = 2.1960e-04
Loss = 2.7396e-03, PNorm = 152.9334, GNorm = 0.2233, lr_0 = 2.1945e-04
Loss = 2.1105e-03, PNorm = 152.9398, GNorm = 0.1089, lr_0 = 2.1930e-04
Loss = 6.4302e-03, PNorm = 152.9448, GNorm = 0.1606, lr_0 = 2.1915e-04
Loss = 2.8190e-03, PNorm = 152.9486, GNorm = 0.0922, lr_0 = 2.1900e-04
Loss = 3.0244e-03, PNorm = 152.9514, GNorm = 0.2381, lr_0 = 2.1885e-04
Loss = 2.3476e-03, PNorm = 152.9543, GNorm = 0.1322, lr_0 = 2.1870e-04
Loss = 3.6239e-03, PNorm = 152.9586, GNorm = 0.2047, lr_0 = 2.1855e-04
Loss = 2.7200e-03, PNorm = 152.9651, GNorm = 0.1858, lr_0 = 2.1840e-04
Loss = 2.9519e-03, PNorm = 152.9689, GNorm = 0.1275, lr_0 = 2.1825e-04
Loss = 5.3218e-03, PNorm = 152.9764, GNorm = 0.2937, lr_0 = 2.1810e-04
Loss = 3.4738e-03, PNorm = 152.9821, GNorm = 0.1194, lr_0 = 2.1795e-04
Loss = 2.4296e-03, PNorm = 152.9871, GNorm = 0.1731, lr_0 = 2.1780e-04
Loss = 1.8523e-03, PNorm = 152.9935, GNorm = 0.2034, lr_0 = 2.1765e-04
Loss = 2.0862e-03, PNorm = 152.9984, GNorm = 0.1496, lr_0 = 2.1751e-04
Loss = 2.3433e-03, PNorm = 153.0029, GNorm = 0.1141, lr_0 = 2.1736e-04
Loss = 2.3169e-03, PNorm = 153.0079, GNorm = 0.0619, lr_0 = 2.1721e-04
Loss = 1.8216e-03, PNorm = 153.0133, GNorm = 0.0901, lr_0 = 2.1706e-04
Loss = 1.7975e-03, PNorm = 153.0173, GNorm = 0.1612, lr_0 = 2.1691e-04
Loss = 2.9204e-03, PNorm = 153.0230, GNorm = 0.0935, lr_0 = 2.1676e-04
Loss = 3.1498e-03, PNorm = 153.0296, GNorm = 0.1798, lr_0 = 2.1661e-04
Loss = 2.7936e-03, PNorm = 153.0353, GNorm = 0.1155, lr_0 = 2.1646e-04
Loss = 2.5467e-03, PNorm = 153.0390, GNorm = 0.0638, lr_0 = 2.1632e-04
Loss = 4.0302e-03, PNorm = 153.0424, GNorm = 0.1623, lr_0 = 2.1617e-04
Loss = 2.4338e-03, PNorm = 153.0492, GNorm = 0.2532, lr_0 = 2.1602e-04
Loss = 3.0615e-03, PNorm = 153.0543, GNorm = 0.0685, lr_0 = 2.1587e-04
Loss = 4.2044e-03, PNorm = 153.0620, GNorm = 0.1505, lr_0 = 2.1572e-04
Loss = 4.1192e-03, PNorm = 153.0677, GNorm = 0.1512, lr_0 = 2.1558e-04
Loss = 1.7910e-03, PNorm = 153.0734, GNorm = 0.0443, lr_0 = 2.1543e-04
Loss = 1.7333e-03, PNorm = 153.0816, GNorm = 0.1683, lr_0 = 2.1528e-04
Loss = 3.0364e-03, PNorm = 153.0861, GNorm = 0.1156, lr_0 = 2.1513e-04
Loss = 1.9600e-03, PNorm = 153.0896, GNorm = 0.0849, lr_0 = 2.1499e-04
Loss = 1.8813e-03, PNorm = 153.0952, GNorm = 0.0712, lr_0 = 2.1484e-04
Loss = 4.2327e-03, PNorm = 153.1015, GNorm = 0.4154, lr_0 = 2.1469e-04
Loss = 2.5920e-03, PNorm = 153.1033, GNorm = 0.1864, lr_0 = 2.1454e-04
Loss = 2.5337e-03, PNorm = 153.1075, GNorm = 0.1426, lr_0 = 2.1440e-04
Loss = 1.8166e-03, PNorm = 153.1123, GNorm = 0.1015, lr_0 = 2.1425e-04
Loss = 1.9084e-03, PNorm = 153.1182, GNorm = 0.1264, lr_0 = 2.1410e-04
Loss = 2.1243e-03, PNorm = 153.1202, GNorm = 0.1675, lr_0 = 2.1396e-04
Loss = 2.8509e-03, PNorm = 153.1260, GNorm = 0.0916, lr_0 = 2.1381e-04
Loss = 2.1273e-03, PNorm = 153.1312, GNorm = 0.1404, lr_0 = 2.1366e-04
Loss = 2.4041e-03, PNorm = 153.1352, GNorm = 0.0884, lr_0 = 2.1352e-04
Loss = 3.1683e-03, PNorm = 153.1397, GNorm = 0.1822, lr_0 = 2.1337e-04
Loss = 3.5669e-03, PNorm = 153.1441, GNorm = 0.2062, lr_0 = 2.1323e-04
Loss = 3.2708e-03, PNorm = 153.1494, GNorm = 0.1653, lr_0 = 2.1308e-04
Loss = 4.0034e-03, PNorm = 153.1537, GNorm = 0.4951, lr_0 = 2.1293e-04
Loss = 3.0716e-03, PNorm = 153.1573, GNorm = 0.1575, lr_0 = 2.1279e-04
Loss = 2.3076e-03, PNorm = 153.1643, GNorm = 0.1640, lr_0 = 2.1264e-04
Loss = 2.8662e-03, PNorm = 153.1679, GNorm = 0.1145, lr_0 = 2.1250e-04
Loss = 2.8450e-03, PNorm = 153.1706, GNorm = 0.1813, lr_0 = 2.1235e-04
Loss = 2.5328e-03, PNorm = 153.1769, GNorm = 0.1290, lr_0 = 2.1221e-04
Loss = 2.1372e-03, PNorm = 153.1818, GNorm = 0.2096, lr_0 = 2.1206e-04
Loss = 3.3838e-03, PNorm = 153.1879, GNorm = 0.2225, lr_0 = 2.1191e-04
Loss = 3.7122e-03, PNorm = 153.1935, GNorm = 0.1815, lr_0 = 2.1177e-04
Loss = 2.5281e-03, PNorm = 153.1967, GNorm = 0.2977, lr_0 = 2.1162e-04
Loss = 5.1080e-03, PNorm = 153.2027, GNorm = 0.1814, lr_0 = 2.1148e-04
Loss = 2.4915e-03, PNorm = 153.2065, GNorm = 0.1983, lr_0 = 2.1133e-04
Loss = 2.5017e-03, PNorm = 153.2131, GNorm = 0.2105, lr_0 = 2.1119e-04
Loss = 2.5078e-03, PNorm = 153.2183, GNorm = 0.1734, lr_0 = 2.1104e-04
Loss = 6.2509e-03, PNorm = 153.2214, GNorm = 0.0918, lr_0 = 2.1090e-04
Loss = 1.8761e-03, PNorm = 153.2227, GNorm = 0.1256, lr_0 = 2.1076e-04
Loss = 3.8725e-03, PNorm = 153.2289, GNorm = 0.1593, lr_0 = 2.1061e-04
Loss = 1.7950e-03, PNorm = 153.2323, GNorm = 0.0630, lr_0 = 2.1047e-04
Loss = 2.3764e-03, PNorm = 153.2394, GNorm = 0.1188, lr_0 = 2.1032e-04
Loss = 2.6976e-03, PNorm = 153.2439, GNorm = 0.1551, lr_0 = 2.1018e-04
Loss = 2.1396e-03, PNorm = 153.2476, GNorm = 0.0959, lr_0 = 2.1003e-04
Loss = 1.6727e-03, PNorm = 153.2528, GNorm = 0.1503, lr_0 = 2.0989e-04
Loss = 2.5312e-03, PNorm = 153.2584, GNorm = 0.0995, lr_0 = 2.0975e-04
Loss = 2.8399e-03, PNorm = 153.2660, GNorm = 0.1525, lr_0 = 2.0960e-04
Validation mae = 0.475413
Epoch 21
Loss = 2.4815e-03, PNorm = 153.2713, GNorm = 0.4432, lr_0 = 2.0946e-04
Loss = 2.7115e-03, PNorm = 153.2762, GNorm = 0.1060, lr_0 = 2.0932e-04
Loss = 2.0930e-03, PNorm = 153.2779, GNorm = 0.0653, lr_0 = 2.0917e-04
Loss = 1.7860e-03, PNorm = 153.2796, GNorm = 0.1047, lr_0 = 2.0903e-04
Loss = 2.3197e-03, PNorm = 153.2826, GNorm = 0.1231, lr_0 = 2.0889e-04
Loss = 2.1139e-03, PNorm = 153.2859, GNorm = 0.0737, lr_0 = 2.0874e-04
Loss = 1.6937e-03, PNorm = 153.2908, GNorm = 0.1907, lr_0 = 2.0860e-04
Loss = 2.3004e-03, PNorm = 153.2943, GNorm = 0.0633, lr_0 = 2.0846e-04
Loss = 2.0040e-03, PNorm = 153.2972, GNorm = 0.1488, lr_0 = 2.0831e-04
Loss = 1.6418e-03, PNorm = 153.3032, GNorm = 0.1409, lr_0 = 2.0817e-04
Loss = 2.6828e-03, PNorm = 153.3084, GNorm = 0.1128, lr_0 = 2.0803e-04
Loss = 2.6341e-03, PNorm = 153.3134, GNorm = 0.1341, lr_0 = 2.0789e-04
Loss = 1.8320e-03, PNorm = 153.3181, GNorm = 0.2675, lr_0 = 2.0774e-04
Loss = 3.8674e-03, PNorm = 153.3212, GNorm = 0.0978, lr_0 = 2.0760e-04
Loss = 2.4137e-03, PNorm = 153.3251, GNorm = 0.2746, lr_0 = 2.0746e-04
Loss = 1.5997e-03, PNorm = 153.3267, GNorm = 0.0577, lr_0 = 2.0732e-04
Loss = 2.3892e-03, PNorm = 153.3293, GNorm = 0.0624, lr_0 = 2.0718e-04
Loss = 1.5573e-03, PNorm = 153.3331, GNorm = 0.2448, lr_0 = 2.0703e-04
Loss = 2.8572e-03, PNorm = 153.3369, GNorm = 0.2148, lr_0 = 2.0689e-04
Loss = 1.5838e-03, PNorm = 153.3406, GNorm = 0.1679, lr_0 = 2.0675e-04
Loss = 1.5936e-03, PNorm = 153.3468, GNorm = 0.1801, lr_0 = 2.0661e-04
Loss = 2.0315e-03, PNorm = 153.3506, GNorm = 0.1881, lr_0 = 2.0647e-04
Loss = 1.7939e-03, PNorm = 153.3557, GNorm = 0.0648, lr_0 = 2.0633e-04
Loss = 1.7443e-03, PNorm = 153.3607, GNorm = 0.2297, lr_0 = 2.0618e-04
Loss = 2.5969e-03, PNorm = 153.3629, GNorm = 0.1565, lr_0 = 2.0604e-04
Loss = 3.4323e-03, PNorm = 153.3659, GNorm = 0.2001, lr_0 = 2.0590e-04
Loss = 2.4191e-03, PNorm = 153.3678, GNorm = 0.0538, lr_0 = 2.0576e-04
Loss = 2.0166e-03, PNorm = 153.3701, GNorm = 0.0511, lr_0 = 2.0562e-04
Loss = 3.3008e-03, PNorm = 153.3756, GNorm = 0.1906, lr_0 = 2.0548e-04
Loss = 1.9336e-03, PNorm = 153.3777, GNorm = 0.2986, lr_0 = 2.0534e-04
Loss = 1.9154e-03, PNorm = 153.3814, GNorm = 0.1023, lr_0 = 2.0520e-04
Loss = 2.1560e-03, PNorm = 153.3857, GNorm = 0.1086, lr_0 = 2.0506e-04
Loss = 2.1576e-03, PNorm = 153.3872, GNorm = 0.1790, lr_0 = 2.0492e-04
Loss = 2.7047e-03, PNorm = 153.3909, GNorm = 0.0464, lr_0 = 2.0478e-04
Loss = 1.6500e-03, PNorm = 153.3958, GNorm = 0.1352, lr_0 = 2.0464e-04
Loss = 1.5234e-03, PNorm = 153.3988, GNorm = 0.1836, lr_0 = 2.0450e-04
Loss = 3.4046e-03, PNorm = 153.4051, GNorm = 0.0493, lr_0 = 2.0436e-04
Loss = 2.0648e-03, PNorm = 153.4079, GNorm = 0.0838, lr_0 = 2.0422e-04
Loss = 3.0283e-03, PNorm = 153.4122, GNorm = 0.1053, lr_0 = 2.0408e-04
Loss = 1.6352e-03, PNorm = 153.4152, GNorm = 0.2823, lr_0 = 2.0394e-04
Loss = 4.2977e-03, PNorm = 153.4182, GNorm = 0.0576, lr_0 = 2.0380e-04
Loss = 2.3639e-03, PNorm = 153.4237, GNorm = 0.0881, lr_0 = 2.0366e-04
Loss = 2.7080e-03, PNorm = 153.4287, GNorm = 0.1611, lr_0 = 2.0352e-04
Loss = 2.9624e-03, PNorm = 153.4292, GNorm = 0.2902, lr_0 = 2.0338e-04
Loss = 2.3385e-03, PNorm = 153.4313, GNorm = 0.2276, lr_0 = 2.0324e-04
Loss = 3.4916e-03, PNorm = 153.4348, GNorm = 0.2000, lr_0 = 2.0310e-04
Loss = 2.6564e-03, PNorm = 153.4401, GNorm = 0.0915, lr_0 = 2.0296e-04
Loss = 1.5205e-03, PNorm = 153.4421, GNorm = 0.1724, lr_0 = 2.0282e-04
Loss = 2.5524e-03, PNorm = 153.4450, GNorm = 0.1359, lr_0 = 2.0268e-04
Loss = 2.0434e-03, PNorm = 153.4477, GNorm = 0.1422, lr_0 = 2.0254e-04
Loss = 1.5787e-03, PNorm = 153.4487, GNorm = 0.0980, lr_0 = 2.0240e-04
Loss = 1.9475e-03, PNorm = 153.4539, GNorm = 0.3151, lr_0 = 2.0227e-04
Loss = 1.8692e-03, PNorm = 153.4584, GNorm = 0.0697, lr_0 = 2.0213e-04
Loss = 1.8386e-03, PNorm = 153.4636, GNorm = 0.1360, lr_0 = 2.0199e-04
Loss = 3.4340e-03, PNorm = 153.4671, GNorm = 0.1437, lr_0 = 2.0185e-04
Loss = 3.1019e-03, PNorm = 153.4706, GNorm = 0.0935, lr_0 = 2.0171e-04
Loss = 1.6095e-03, PNorm = 153.4747, GNorm = 0.2026, lr_0 = 2.0157e-04
Loss = 3.2598e-03, PNorm = 153.4778, GNorm = 0.1538, lr_0 = 2.0144e-04
Loss = 1.4586e-03, PNorm = 153.4823, GNorm = 0.0810, lr_0 = 2.0130e-04
Loss = 1.9278e-03, PNorm = 153.4873, GNorm = 0.1012, lr_0 = 2.0116e-04
Loss = 1.3897e-03, PNorm = 153.4919, GNorm = 0.0410, lr_0 = 2.0102e-04
Loss = 1.9086e-03, PNorm = 153.4946, GNorm = 0.1674, lr_0 = 2.0088e-04
Loss = 1.6099e-03, PNorm = 153.4978, GNorm = 0.0920, lr_0 = 2.0075e-04
Loss = 2.3337e-03, PNorm = 153.5008, GNorm = 0.1707, lr_0 = 2.0061e-04
Loss = 1.4683e-03, PNorm = 153.5038, GNorm = 0.1397, lr_0 = 2.0047e-04
Loss = 3.2500e-03, PNorm = 153.5092, GNorm = 0.1263, lr_0 = 2.0033e-04
Loss = 1.6182e-03, PNorm = 153.5127, GNorm = 0.0930, lr_0 = 2.0020e-04
Loss = 2.4538e-03, PNorm = 153.5170, GNorm = 0.1123, lr_0 = 2.0006e-04
Loss = 1.6468e-03, PNorm = 153.5214, GNorm = 0.1464, lr_0 = 1.9992e-04
Loss = 1.9600e-03, PNorm = 153.5285, GNorm = 0.1907, lr_0 = 1.9979e-04
Loss = 4.1952e-03, PNorm = 153.5322, GNorm = 0.2381, lr_0 = 1.9965e-04
Loss = 1.9575e-03, PNorm = 153.5353, GNorm = 0.1312, lr_0 = 1.9951e-04
Loss = 1.8524e-03, PNorm = 153.5387, GNorm = 0.1837, lr_0 = 1.9938e-04
Loss = 3.6335e-03, PNorm = 153.5429, GNorm = 0.1156, lr_0 = 1.9924e-04
Loss = 3.3419e-03, PNorm = 153.5470, GNorm = 0.1017, lr_0 = 1.9910e-04
Loss = 1.6758e-03, PNorm = 153.5530, GNorm = 0.0997, lr_0 = 1.9897e-04
Loss = 1.8771e-03, PNorm = 153.5567, GNorm = 0.2313, lr_0 = 1.9883e-04
Loss = 1.8999e-03, PNorm = 153.5631, GNorm = 0.0807, lr_0 = 1.9869e-04
Loss = 2.1506e-03, PNorm = 153.5666, GNorm = 0.1110, lr_0 = 1.9856e-04
Loss = 1.8124e-03, PNorm = 153.5708, GNorm = 0.2426, lr_0 = 1.9842e-04
Loss = 3.8112e-03, PNorm = 153.5756, GNorm = 0.1809, lr_0 = 1.9829e-04
Loss = 1.6165e-03, PNorm = 153.5794, GNorm = 0.1215, lr_0 = 1.9815e-04
Loss = 1.9384e-03, PNorm = 153.5836, GNorm = 0.4765, lr_0 = 1.9801e-04
Loss = 2.0265e-03, PNorm = 153.5892, GNorm = 0.0707, lr_0 = 1.9788e-04
Loss = 1.6433e-03, PNorm = 153.5956, GNorm = 0.0620, lr_0 = 1.9774e-04
Loss = 2.0264e-03, PNorm = 153.6004, GNorm = 0.1080, lr_0 = 1.9761e-04
Loss = 2.6281e-03, PNorm = 153.6052, GNorm = 0.2291, lr_0 = 1.9747e-04
Loss = 2.2524e-03, PNorm = 153.6094, GNorm = 0.0829, lr_0 = 1.9734e-04
Loss = 1.7480e-03, PNorm = 153.6127, GNorm = 0.1390, lr_0 = 1.9720e-04
Loss = 2.4285e-03, PNorm = 153.6166, GNorm = 0.0565, lr_0 = 1.9707e-04
Loss = 1.9878e-03, PNorm = 153.6184, GNorm = 0.1611, lr_0 = 1.9693e-04
Loss = 1.6300e-03, PNorm = 153.6209, GNorm = 0.1255, lr_0 = 1.9680e-04
Loss = 1.3761e-03, PNorm = 153.6243, GNorm = 0.0295, lr_0 = 1.9666e-04
Loss = 2.1148e-03, PNorm = 153.6289, GNorm = 0.0836, lr_0 = 1.9653e-04
Loss = 2.0764e-03, PNorm = 153.6358, GNorm = 0.1367, lr_0 = 1.9639e-04
Loss = 2.6904e-03, PNorm = 153.6424, GNorm = 0.1566, lr_0 = 1.9626e-04
Loss = 4.2449e-03, PNorm = 153.6437, GNorm = 0.1796, lr_0 = 1.9612e-04
Loss = 2.3371e-03, PNorm = 153.6476, GNorm = 0.1495, lr_0 = 1.9599e-04
Loss = 2.3534e-03, PNorm = 153.6532, GNorm = 0.1373, lr_0 = 1.9585e-04
Loss = 6.1851e-03, PNorm = 153.6538, GNorm = 0.1528, lr_0 = 1.9572e-04
Loss = 1.8136e-03, PNorm = 153.6566, GNorm = 0.2364, lr_0 = 1.9559e-04
Loss = 2.5720e-03, PNorm = 153.6632, GNorm = 0.2511, lr_0 = 1.9545e-04
Loss = 2.1527e-03, PNorm = 153.6683, GNorm = 0.1995, lr_0 = 1.9532e-04
Loss = 3.0746e-03, PNorm = 153.6777, GNorm = 0.1829, lr_0 = 1.9518e-04
Loss = 1.6786e-03, PNorm = 153.6860, GNorm = 0.1805, lr_0 = 1.9505e-04
Loss = 2.3874e-03, PNorm = 153.6897, GNorm = 0.1736, lr_0 = 1.9492e-04
Loss = 3.6730e-03, PNorm = 153.6947, GNorm = 0.0539, lr_0 = 1.9478e-04
Loss = 3.1434e-03, PNorm = 153.7015, GNorm = 0.0808, lr_0 = 1.9465e-04
Loss = 2.4476e-03, PNorm = 153.7095, GNorm = 0.1350, lr_0 = 1.9452e-04
Loss = 2.8141e-03, PNorm = 153.7149, GNorm = 0.1242, lr_0 = 1.9438e-04
Loss = 1.9029e-03, PNorm = 153.7191, GNorm = 0.1027, lr_0 = 1.9425e-04
Loss = 1.6983e-03, PNorm = 153.7232, GNorm = 0.0816, lr_0 = 1.9412e-04
Loss = 2.0710e-03, PNorm = 153.7256, GNorm = 0.1626, lr_0 = 1.9398e-04
Loss = 1.7255e-03, PNorm = 153.7279, GNorm = 0.0584, lr_0 = 1.9385e-04
Loss = 3.4226e-03, PNorm = 153.7302, GNorm = 0.4308, lr_0 = 1.9372e-04
Loss = 2.5682e-03, PNorm = 153.7321, GNorm = 0.0911, lr_0 = 1.9359e-04
Loss = 1.9596e-03, PNorm = 153.7384, GNorm = 0.1559, lr_0 = 1.9345e-04
Loss = 3.5803e-03, PNorm = 153.7437, GNorm = 0.1287, lr_0 = 1.9332e-04
Loss = 1.9989e-03, PNorm = 153.7459, GNorm = 0.1010, lr_0 = 1.9319e-04
Loss = 4.3531e-03, PNorm = 153.7491, GNorm = 0.0798, lr_0 = 1.9306e-04
Validation mae = 0.476673
Epoch 22
Loss = 1.6645e-03, PNorm = 153.7540, GNorm = 0.1659, lr_0 = 1.9292e-04
Loss = 1.7743e-03, PNorm = 153.7558, GNorm = 0.2144, lr_0 = 1.9279e-04
Loss = 2.2196e-03, PNorm = 153.7610, GNorm = 0.2097, lr_0 = 1.9266e-04
Loss = 1.7444e-03, PNorm = 153.7641, GNorm = 0.1985, lr_0 = 1.9253e-04
Loss = 2.4157e-03, PNorm = 153.7682, GNorm = 0.2065, lr_0 = 1.9240e-04
Loss = 2.6818e-03, PNorm = 153.7729, GNorm = 0.0839, lr_0 = 1.9226e-04
Loss = 1.5445e-03, PNorm = 153.7757, GNorm = 0.1376, lr_0 = 1.9213e-04
Loss = 2.0170e-03, PNorm = 153.7801, GNorm = 0.1655, lr_0 = 1.9200e-04
Loss = 1.7550e-03, PNorm = 153.7830, GNorm = 0.0656, lr_0 = 1.9187e-04
Loss = 1.7016e-03, PNorm = 153.7838, GNorm = 0.0982, lr_0 = 1.9174e-04
Loss = 1.6973e-03, PNorm = 153.7863, GNorm = 0.1300, lr_0 = 1.9161e-04
Loss = 2.9239e-03, PNorm = 153.7921, GNorm = 0.2243, lr_0 = 1.9148e-04
Loss = 1.2679e-03, PNorm = 153.7957, GNorm = 0.0870, lr_0 = 1.9134e-04
Loss = 1.5763e-03, PNorm = 153.7987, GNorm = 0.1522, lr_0 = 1.9121e-04
Loss = 1.3877e-03, PNorm = 153.8021, GNorm = 0.0567, lr_0 = 1.9108e-04
Loss = 1.4082e-03, PNorm = 153.8062, GNorm = 0.2622, lr_0 = 1.9095e-04
Loss = 2.3222e-03, PNorm = 153.8096, GNorm = 0.0756, lr_0 = 1.9082e-04
Loss = 1.4001e-03, PNorm = 153.8109, GNorm = 0.1948, lr_0 = 1.9069e-04
Loss = 1.8307e-03, PNorm = 153.8138, GNorm = 0.1233, lr_0 = 1.9056e-04
Loss = 2.6776e-03, PNorm = 153.8175, GNorm = 0.1005, lr_0 = 1.9043e-04
Loss = 1.5201e-03, PNorm = 153.8230, GNorm = 0.1875, lr_0 = 1.9030e-04
Loss = 2.1360e-03, PNorm = 153.8273, GNorm = 0.1055, lr_0 = 1.9017e-04
Loss = 1.5327e-03, PNorm = 153.8301, GNorm = 0.1413, lr_0 = 1.9004e-04
Loss = 2.0727e-03, PNorm = 153.8333, GNorm = 0.0832, lr_0 = 1.8991e-04
Loss = 1.5443e-03, PNorm = 153.8349, GNorm = 0.0571, lr_0 = 1.8978e-04
Loss = 3.0728e-03, PNorm = 153.8375, GNorm = 0.0600, lr_0 = 1.8965e-04
Loss = 1.4379e-03, PNorm = 153.8432, GNorm = 0.1895, lr_0 = 1.8952e-04
Loss = 1.4864e-03, PNorm = 153.8464, GNorm = 0.1281, lr_0 = 1.8939e-04
Loss = 1.9846e-03, PNorm = 153.8491, GNorm = 0.0765, lr_0 = 1.8926e-04
Loss = 1.4023e-03, PNorm = 153.8499, GNorm = 0.1766, lr_0 = 1.8913e-04
Loss = 2.0982e-03, PNorm = 153.8515, GNorm = 0.1202, lr_0 = 1.8900e-04
Loss = 3.0313e-03, PNorm = 153.8541, GNorm = 0.2452, lr_0 = 1.8887e-04
Loss = 1.7604e-03, PNorm = 153.8562, GNorm = 0.0874, lr_0 = 1.8874e-04
Loss = 1.4180e-03, PNorm = 153.8590, GNorm = 0.0717, lr_0 = 1.8861e-04
Loss = 2.4325e-03, PNorm = 153.8639, GNorm = 0.0577, lr_0 = 1.8848e-04
Loss = 1.6035e-03, PNorm = 153.8694, GNorm = 0.1395, lr_0 = 1.8835e-04
Loss = 1.6965e-03, PNorm = 153.8729, GNorm = 0.0364, lr_0 = 1.8822e-04
Loss = 2.1579e-03, PNorm = 153.8781, GNorm = 0.0343, lr_0 = 1.8809e-04
Loss = 3.3844e-03, PNorm = 153.8825, GNorm = 0.1064, lr_0 = 1.8797e-04
Loss = 1.3754e-03, PNorm = 153.8867, GNorm = 0.0667, lr_0 = 1.8784e-04
Loss = 3.3847e-03, PNorm = 153.8908, GNorm = 0.0642, lr_0 = 1.8771e-04
Loss = 1.6420e-03, PNorm = 153.8923, GNorm = 0.2861, lr_0 = 1.8758e-04
Loss = 1.4719e-03, PNorm = 153.8971, GNorm = 0.0883, lr_0 = 1.8745e-04
Loss = 4.0249e-03, PNorm = 153.8996, GNorm = 0.0675, lr_0 = 1.8732e-04
Loss = 3.2802e-03, PNorm = 153.9029, GNorm = 0.1527, lr_0 = 1.8719e-04
Loss = 1.3412e-03, PNorm = 153.9078, GNorm = 0.1405, lr_0 = 1.8707e-04
Loss = 1.6394e-03, PNorm = 153.9120, GNorm = 0.1526, lr_0 = 1.8694e-04
Loss = 3.3531e-03, PNorm = 153.9159, GNorm = 0.1302, lr_0 = 1.8681e-04
Loss = 2.2097e-03, PNorm = 153.9172, GNorm = 0.1196, lr_0 = 1.8668e-04
Loss = 1.6715e-03, PNorm = 153.9192, GNorm = 0.1092, lr_0 = 1.8655e-04
Loss = 1.4698e-03, PNorm = 153.9199, GNorm = 0.1612, lr_0 = 1.8643e-04
Loss = 1.9770e-03, PNorm = 153.9241, GNorm = 0.2054, lr_0 = 1.8630e-04
Loss = 1.8436e-03, PNorm = 153.9292, GNorm = 0.1720, lr_0 = 1.8617e-04
Loss = 1.3628e-03, PNorm = 153.9338, GNorm = 0.1568, lr_0 = 1.8604e-04
Loss = 1.5602e-03, PNorm = 153.9377, GNorm = 0.1177, lr_0 = 1.8592e-04
Loss = 5.2921e-03, PNorm = 153.9388, GNorm = 0.1846, lr_0 = 1.8579e-04
Loss = 3.4635e-03, PNorm = 153.9430, GNorm = 0.1412, lr_0 = 1.8566e-04
Loss = 2.5252e-03, PNorm = 153.9450, GNorm = 0.1077, lr_0 = 1.8553e-04
Loss = 3.6473e-03, PNorm = 153.9515, GNorm = 0.1170, lr_0 = 1.8541e-04
Loss = 2.2113e-03, PNorm = 153.9543, GNorm = 0.1095, lr_0 = 1.8528e-04
Loss = 1.8281e-03, PNorm = 153.9550, GNorm = 0.0845, lr_0 = 1.8515e-04
Loss = 1.4532e-03, PNorm = 153.9592, GNorm = 0.1722, lr_0 = 1.8503e-04
Loss = 1.3232e-03, PNorm = 153.9643, GNorm = 0.1205, lr_0 = 1.8490e-04
Loss = 1.9540e-03, PNorm = 153.9677, GNorm = 0.3202, lr_0 = 1.8477e-04
Loss = 3.0828e-03, PNorm = 153.9692, GNorm = 0.2192, lr_0 = 1.8465e-04
Loss = 2.2344e-03, PNorm = 153.9739, GNorm = 0.1483, lr_0 = 1.8452e-04
Loss = 2.0780e-03, PNorm = 153.9783, GNorm = 0.1820, lr_0 = 1.8439e-04
Loss = 1.3778e-03, PNorm = 153.9822, GNorm = 0.2823, lr_0 = 1.8427e-04
Loss = 2.6120e-03, PNorm = 153.9841, GNorm = 0.1061, lr_0 = 1.8414e-04
Loss = 2.0575e-03, PNorm = 153.9872, GNorm = 0.1481, lr_0 = 1.8401e-04
Loss = 2.4677e-03, PNorm = 153.9889, GNorm = 0.0838, lr_0 = 1.8389e-04
Loss = 1.6710e-03, PNorm = 153.9917, GNorm = 0.0454, lr_0 = 1.8376e-04
Loss = 1.2720e-03, PNorm = 153.9965, GNorm = 0.1924, lr_0 = 1.8364e-04
Loss = 1.8557e-03, PNorm = 154.0009, GNorm = 0.0606, lr_0 = 1.8351e-04
Loss = 4.0529e-03, PNorm = 154.0058, GNorm = 0.2857, lr_0 = 1.8338e-04
Loss = 1.6876e-03, PNorm = 154.0119, GNorm = 0.0456, lr_0 = 1.8326e-04
Loss = 2.0608e-03, PNorm = 154.0158, GNorm = 0.1159, lr_0 = 1.8313e-04
Loss = 1.6510e-03, PNorm = 154.0208, GNorm = 0.1902, lr_0 = 1.8301e-04
Loss = 2.1884e-03, PNorm = 154.0258, GNorm = 0.2315, lr_0 = 1.8288e-04
Loss = 1.8771e-03, PNorm = 154.0267, GNorm = 0.0671, lr_0 = 1.8276e-04
Loss = 2.4919e-03, PNorm = 154.0290, GNorm = 0.0853, lr_0 = 1.8263e-04
Loss = 1.2849e-03, PNorm = 154.0297, GNorm = 0.0421, lr_0 = 1.8251e-04
Loss = 2.0473e-03, PNorm = 154.0340, GNorm = 0.1226, lr_0 = 1.8238e-04
Loss = 1.6368e-03, PNorm = 154.0371, GNorm = 0.0809, lr_0 = 1.8226e-04
Loss = 1.4393e-03, PNorm = 154.0417, GNorm = 0.0675, lr_0 = 1.8213e-04
Loss = 1.3569e-03, PNorm = 154.0471, GNorm = 0.0425, lr_0 = 1.8201e-04
Loss = 5.2359e-03, PNorm = 154.0532, GNorm = 0.0588, lr_0 = 1.8188e-04
Loss = 1.6004e-03, PNorm = 154.0584, GNorm = 0.1999, lr_0 = 1.8176e-04
Loss = 2.4213e-03, PNorm = 154.0602, GNorm = 0.1242, lr_0 = 1.8163e-04
Loss = 1.6892e-03, PNorm = 154.0638, GNorm = 0.1367, lr_0 = 1.8151e-04
Loss = 1.6770e-03, PNorm = 154.0656, GNorm = 0.1046, lr_0 = 1.8138e-04
Loss = 1.8890e-03, PNorm = 154.0694, GNorm = 0.1303, lr_0 = 1.8126e-04
Loss = 1.4687e-03, PNorm = 154.0719, GNorm = 0.1277, lr_0 = 1.8114e-04
Loss = 1.3823e-03, PNorm = 154.0759, GNorm = 0.1099, lr_0 = 1.8101e-04
Loss = 1.2066e-03, PNorm = 154.0809, GNorm = 0.0431, lr_0 = 1.8089e-04
Loss = 2.0276e-03, PNorm = 154.0832, GNorm = 0.1293, lr_0 = 1.8076e-04
Loss = 1.5621e-03, PNorm = 154.0858, GNorm = 0.0837, lr_0 = 1.8064e-04
Loss = 1.7416e-03, PNorm = 154.0880, GNorm = 0.0463, lr_0 = 1.8052e-04
Loss = 3.0018e-03, PNorm = 154.0902, GNorm = 0.0637, lr_0 = 1.8039e-04
Loss = 1.7611e-03, PNorm = 154.0929, GNorm = 0.1434, lr_0 = 1.8027e-04
Loss = 2.2759e-03, PNorm = 154.0970, GNorm = 0.0447, lr_0 = 1.8015e-04
Loss = 3.6273e-03, PNorm = 154.0992, GNorm = 0.1145, lr_0 = 1.8002e-04
Loss = 1.9836e-03, PNorm = 154.1034, GNorm = 0.1481, lr_0 = 1.7990e-04
Loss = 4.6565e-03, PNorm = 154.1093, GNorm = 0.4154, lr_0 = 1.7978e-04
Loss = 2.0482e-03, PNorm = 154.1156, GNorm = 0.1819, lr_0 = 1.7965e-04
Loss = 1.3546e-03, PNorm = 154.1187, GNorm = 0.1975, lr_0 = 1.7953e-04
Loss = 1.3960e-03, PNorm = 154.1226, GNorm = 0.1476, lr_0 = 1.7941e-04
Loss = 3.3902e-03, PNorm = 154.1271, GNorm = 0.0588, lr_0 = 1.7928e-04
Loss = 1.2787e-03, PNorm = 154.1300, GNorm = 0.0750, lr_0 = 1.7916e-04
Loss = 3.2380e-03, PNorm = 154.1323, GNorm = 0.0590, lr_0 = 1.7904e-04
Loss = 2.1910e-03, PNorm = 154.1338, GNorm = 0.1787, lr_0 = 1.7892e-04
Loss = 2.4791e-03, PNorm = 154.1351, GNorm = 0.1316, lr_0 = 1.7879e-04
Loss = 1.3888e-03, PNorm = 154.1369, GNorm = 0.0643, lr_0 = 1.7867e-04
Loss = 3.2387e-03, PNorm = 154.1390, GNorm = 0.5020, lr_0 = 1.7855e-04
Loss = 1.1484e-03, PNorm = 154.1421, GNorm = 0.1320, lr_0 = 1.7843e-04
Loss = 2.0026e-03, PNorm = 154.1462, GNorm = 0.1169, lr_0 = 1.7830e-04
Loss = 3.2705e-03, PNorm = 154.1485, GNorm = 0.1271, lr_0 = 1.7818e-04
Loss = 2.0573e-03, PNorm = 154.1514, GNorm = 0.1793, lr_0 = 1.7806e-04
Loss = 2.1579e-03, PNorm = 154.1531, GNorm = 0.2967, lr_0 = 1.7794e-04
Loss = 1.4333e-03, PNorm = 154.1571, GNorm = 0.0414, lr_0 = 1.7782e-04
Validation mae = 0.475466
Epoch 23
Loss = 1.3125e-03, PNorm = 154.1572, GNorm = 0.1588, lr_0 = 1.7769e-04
Loss = 2.1063e-03, PNorm = 154.1582, GNorm = 0.0913, lr_0 = 1.7757e-04
Loss = 1.2862e-03, PNorm = 154.1594, GNorm = 0.1422, lr_0 = 1.7745e-04
Loss = 1.7177e-03, PNorm = 154.1616, GNorm = 0.1100, lr_0 = 1.7733e-04
Loss = 1.5097e-03, PNorm = 154.1637, GNorm = 0.1301, lr_0 = 1.7721e-04
Loss = 1.7502e-03, PNorm = 154.1649, GNorm = 0.0273, lr_0 = 1.7709e-04
Loss = 1.1747e-03, PNorm = 154.1677, GNorm = 0.0721, lr_0 = 1.7696e-04
Loss = 1.9467e-03, PNorm = 154.1718, GNorm = 0.0690, lr_0 = 1.7684e-04
Loss = 3.5768e-03, PNorm = 154.1744, GNorm = 0.0572, lr_0 = 1.7672e-04
Loss = 1.4334e-03, PNorm = 154.1773, GNorm = 0.0683, lr_0 = 1.7660e-04
Loss = 1.1842e-03, PNorm = 154.1808, GNorm = 0.1226, lr_0 = 1.7648e-04
Loss = 1.2038e-03, PNorm = 154.1845, GNorm = 0.0656, lr_0 = 1.7636e-04
Loss = 1.3658e-03, PNorm = 154.1878, GNorm = 0.0927, lr_0 = 1.7624e-04
Loss = 1.6007e-03, PNorm = 154.1903, GNorm = 0.0731, lr_0 = 1.7612e-04
Loss = 1.2377e-03, PNorm = 154.1911, GNorm = 0.1201, lr_0 = 1.7600e-04
Loss = 1.2327e-03, PNorm = 154.1923, GNorm = 0.0378, lr_0 = 1.7588e-04
Loss = 1.4698e-03, PNorm = 154.1932, GNorm = 0.1192, lr_0 = 1.7576e-04
Loss = 1.2034e-03, PNorm = 154.1959, GNorm = 0.1133, lr_0 = 1.7564e-04
Loss = 3.9694e-03, PNorm = 154.1976, GNorm = 0.1558, lr_0 = 1.7552e-04
Loss = 1.5621e-03, PNorm = 154.2012, GNorm = 0.0614, lr_0 = 1.7540e-04
Loss = 2.0502e-03, PNorm = 154.2057, GNorm = 0.4759, lr_0 = 1.7528e-04
Loss = 1.5869e-03, PNorm = 154.2089, GNorm = 0.0611, lr_0 = 1.7516e-04
Loss = 1.5205e-03, PNorm = 154.2116, GNorm = 0.1154, lr_0 = 1.7504e-04
Loss = 2.7978e-03, PNorm = 154.2142, GNorm = 0.0832, lr_0 = 1.7492e-04
Loss = 1.1946e-03, PNorm = 154.2172, GNorm = 0.1451, lr_0 = 1.7480e-04
Loss = 1.1558e-03, PNorm = 154.2209, GNorm = 0.1383, lr_0 = 1.7468e-04
Loss = 1.8950e-03, PNorm = 154.2240, GNorm = 0.0393, lr_0 = 1.7456e-04
Loss = 2.3751e-03, PNorm = 154.2278, GNorm = 0.2664, lr_0 = 1.7444e-04
Loss = 1.4692e-03, PNorm = 154.2289, GNorm = 0.1766, lr_0 = 1.7432e-04
Loss = 2.0946e-03, PNorm = 154.2299, GNorm = 0.0885, lr_0 = 1.7420e-04
Loss = 2.6989e-03, PNorm = 154.2338, GNorm = 0.1107, lr_0 = 1.7408e-04
Loss = 1.6476e-03, PNorm = 154.2384, GNorm = 0.0524, lr_0 = 1.7396e-04
Loss = 1.3306e-03, PNorm = 154.2417, GNorm = 0.0792, lr_0 = 1.7384e-04
Loss = 1.5571e-03, PNorm = 154.2439, GNorm = 0.1314, lr_0 = 1.7372e-04
Loss = 1.2280e-03, PNorm = 154.2457, GNorm = 0.0538, lr_0 = 1.7360e-04
Loss = 1.0197e-03, PNorm = 154.2474, GNorm = 0.0480, lr_0 = 1.7348e-04
Loss = 2.2170e-03, PNorm = 154.2487, GNorm = 0.1566, lr_0 = 1.7336e-04
Loss = 1.9918e-03, PNorm = 154.2526, GNorm = 0.1264, lr_0 = 1.7325e-04
Loss = 1.8686e-03, PNorm = 154.2543, GNorm = 0.0993, lr_0 = 1.7313e-04
Loss = 3.2464e-03, PNorm = 154.2587, GNorm = 0.1641, lr_0 = 1.7301e-04
Loss = 1.1684e-03, PNorm = 154.2627, GNorm = 0.0733, lr_0 = 1.7289e-04
Loss = 2.0430e-03, PNorm = 154.2667, GNorm = 0.1248, lr_0 = 1.7277e-04
Loss = 2.1511e-03, PNorm = 154.2704, GNorm = 0.0762, lr_0 = 1.7265e-04
Loss = 1.2167e-03, PNorm = 154.2737, GNorm = 0.0529, lr_0 = 1.7253e-04
Loss = 2.1353e-03, PNorm = 154.2756, GNorm = 0.1128, lr_0 = 1.7242e-04
Loss = 2.1332e-03, PNorm = 154.2785, GNorm = 0.1517, lr_0 = 1.7230e-04
Loss = 3.9727e-03, PNorm = 154.2806, GNorm = 0.1919, lr_0 = 1.7218e-04
Loss = 2.2493e-03, PNorm = 154.2851, GNorm = 0.2031, lr_0 = 1.7206e-04
Loss = 1.5734e-03, PNorm = 154.2905, GNorm = 0.1558, lr_0 = 1.7194e-04
Loss = 1.2122e-03, PNorm = 154.2943, GNorm = 0.0448, lr_0 = 1.7183e-04
Loss = 1.4441e-03, PNorm = 154.2975, GNorm = 0.1420, lr_0 = 1.7171e-04
Loss = 2.7858e-03, PNorm = 154.2996, GNorm = 0.2102, lr_0 = 1.7159e-04
Loss = 4.5862e-03, PNorm = 154.3013, GNorm = 0.0919, lr_0 = 1.7147e-04
Loss = 2.6204e-03, PNorm = 154.3060, GNorm = 0.1012, lr_0 = 1.7136e-04
Loss = 1.4456e-03, PNorm = 154.3102, GNorm = 0.1571, lr_0 = 1.7124e-04
Loss = 2.3623e-03, PNorm = 154.3132, GNorm = 0.0814, lr_0 = 1.7112e-04
Loss = 1.7861e-03, PNorm = 154.3139, GNorm = 0.0712, lr_0 = 1.7100e-04
Loss = 2.3490e-03, PNorm = 154.3167, GNorm = 0.0690, lr_0 = 1.7089e-04
Loss = 2.0217e-03, PNorm = 154.3187, GNorm = 0.0627, lr_0 = 1.7077e-04
Loss = 4.7946e-03, PNorm = 154.3217, GNorm = 1.0758, lr_0 = 1.7065e-04
Loss = 2.1609e-03, PNorm = 154.3248, GNorm = 0.1034, lr_0 = 1.7054e-04
Loss = 1.5103e-03, PNorm = 154.3293, GNorm = 0.0499, lr_0 = 1.7042e-04
Loss = 1.2489e-03, PNorm = 154.3313, GNorm = 0.0927, lr_0 = 1.7030e-04
Loss = 1.7985e-03, PNorm = 154.3335, GNorm = 0.1017, lr_0 = 1.7019e-04
Loss = 1.0736e-03, PNorm = 154.3343, GNorm = 0.0579, lr_0 = 1.7007e-04
Loss = 2.1395e-03, PNorm = 154.3350, GNorm = 0.0245, lr_0 = 1.6995e-04
Loss = 2.8365e-03, PNorm = 154.3391, GNorm = 0.1420, lr_0 = 1.6984e-04
Loss = 1.5108e-03, PNorm = 154.3447, GNorm = 0.1420, lr_0 = 1.6972e-04
Loss = 1.9600e-03, PNorm = 154.3486, GNorm = 0.0597, lr_0 = 1.6960e-04
Loss = 2.7405e-03, PNorm = 154.3517, GNorm = 0.4091, lr_0 = 1.6949e-04
Loss = 1.4956e-03, PNorm = 154.3552, GNorm = 0.1134, lr_0 = 1.6937e-04
Loss = 1.9345e-03, PNorm = 154.3588, GNorm = 0.0958, lr_0 = 1.6926e-04
Loss = 1.5018e-03, PNorm = 154.3624, GNorm = 0.1519, lr_0 = 1.6914e-04
Loss = 3.3167e-03, PNorm = 154.3677, GNorm = 0.0470, lr_0 = 1.6902e-04
Loss = 1.2068e-03, PNorm = 154.3728, GNorm = 0.0435, lr_0 = 1.6891e-04
Loss = 2.2894e-03, PNorm = 154.3756, GNorm = 0.1027, lr_0 = 1.6879e-04
Loss = 1.5523e-03, PNorm = 154.3789, GNorm = 0.0377, lr_0 = 1.6868e-04
Loss = 1.3712e-03, PNorm = 154.3832, GNorm = 0.0429, lr_0 = 1.6856e-04
Loss = 1.2215e-03, PNorm = 154.3860, GNorm = 0.0372, lr_0 = 1.6845e-04
Loss = 1.5573e-03, PNorm = 154.3876, GNorm = 0.0880, lr_0 = 1.6833e-04
Loss = 1.6523e-03, PNorm = 154.3906, GNorm = 0.1948, lr_0 = 1.6821e-04
Loss = 9.9306e-04, PNorm = 154.3934, GNorm = 0.0940, lr_0 = 1.6810e-04
Loss = 1.8964e-03, PNorm = 154.3975, GNorm = 0.3006, lr_0 = 1.6798e-04
Loss = 1.3029e-03, PNorm = 154.4008, GNorm = 0.0490, lr_0 = 1.6787e-04
Loss = 3.0741e-03, PNorm = 154.4041, GNorm = 0.1204, lr_0 = 1.6775e-04
Loss = 1.7768e-03, PNorm = 154.4057, GNorm = 0.0667, lr_0 = 1.6764e-04
Loss = 1.8007e-03, PNorm = 154.4092, GNorm = 0.0915, lr_0 = 1.6752e-04
Loss = 1.3926e-03, PNorm = 154.4123, GNorm = 0.1547, lr_0 = 1.6741e-04
Loss = 1.1304e-03, PNorm = 154.4141, GNorm = 0.0402, lr_0 = 1.6729e-04
Loss = 2.5086e-03, PNorm = 154.4177, GNorm = 0.0713, lr_0 = 1.6718e-04
Loss = 2.1290e-03, PNorm = 154.4196, GNorm = 0.1627, lr_0 = 1.6707e-04
Loss = 3.2005e-03, PNorm = 154.4217, GNorm = 0.1367, lr_0 = 1.6695e-04
Loss = 1.0340e-03, PNorm = 154.4227, GNorm = 0.0585, lr_0 = 1.6684e-04
Loss = 1.5381e-03, PNorm = 154.4255, GNorm = 0.1595, lr_0 = 1.6672e-04
Loss = 2.8784e-03, PNorm = 154.4290, GNorm = 0.1504, lr_0 = 1.6661e-04
Loss = 1.0650e-03, PNorm = 154.4318, GNorm = 0.0537, lr_0 = 1.6649e-04
Loss = 1.8678e-03, PNorm = 154.4363, GNorm = 0.2105, lr_0 = 1.6638e-04
Loss = 3.1288e-03, PNorm = 154.4402, GNorm = 0.1411, lr_0 = 1.6627e-04
Loss = 2.0844e-03, PNorm = 154.4432, GNorm = 0.0420, lr_0 = 1.6615e-04
Loss = 2.8872e-03, PNorm = 154.4463, GNorm = 0.1644, lr_0 = 1.6604e-04
Loss = 2.2310e-03, PNorm = 154.4508, GNorm = 0.0391, lr_0 = 1.6592e-04
Loss = 1.2162e-03, PNorm = 154.4525, GNorm = 0.0902, lr_0 = 1.6581e-04
Loss = 1.7893e-03, PNorm = 154.4547, GNorm = 0.0827, lr_0 = 1.6570e-04
Loss = 1.0347e-03, PNorm = 154.4557, GNorm = 0.1816, lr_0 = 1.6558e-04
Loss = 1.2083e-03, PNorm = 154.4569, GNorm = 0.0266, lr_0 = 1.6547e-04
Loss = 1.2507e-03, PNorm = 154.4581, GNorm = 0.1174, lr_0 = 1.6536e-04
Loss = 1.3090e-03, PNorm = 154.4632, GNorm = 0.0785, lr_0 = 1.6524e-04
Loss = 1.8299e-03, PNorm = 154.4673, GNorm = 0.0989, lr_0 = 1.6513e-04
Loss = 1.7167e-03, PNorm = 154.4696, GNorm = 0.0939, lr_0 = 1.6502e-04
Loss = 1.3616e-03, PNorm = 154.4706, GNorm = 0.1516, lr_0 = 1.6490e-04
Loss = 1.9890e-03, PNorm = 154.4725, GNorm = 0.3958, lr_0 = 1.6479e-04
Loss = 1.4341e-03, PNorm = 154.4763, GNorm = 0.0880, lr_0 = 1.6468e-04
Loss = 2.1927e-03, PNorm = 154.4804, GNorm = 0.0971, lr_0 = 1.6457e-04
Loss = 1.9315e-03, PNorm = 154.4845, GNorm = 0.0637, lr_0 = 1.6445e-04
Loss = 1.8998e-03, PNorm = 154.4867, GNorm = 0.1932, lr_0 = 1.6434e-04
Loss = 1.7389e-03, PNorm = 154.4893, GNorm = 0.0817, lr_0 = 1.6423e-04
Loss = 1.8310e-03, PNorm = 154.4923, GNorm = 0.0682, lr_0 = 1.6412e-04
Loss = 3.0834e-03, PNorm = 154.4943, GNorm = 0.0930, lr_0 = 1.6400e-04
Loss = 2.8225e-03, PNorm = 154.4971, GNorm = 0.1186, lr_0 = 1.6389e-04
Loss = 1.1324e-03, PNorm = 154.4996, GNorm = 0.1084, lr_0 = 1.6378e-04
Validation mae = 0.475129
Epoch 24
Loss = 1.7846e-03, PNorm = 154.5029, GNorm = 0.0958, lr_0 = 1.6367e-04
Loss = 3.3502e-03, PNorm = 154.5055, GNorm = 0.0285, lr_0 = 1.6355e-04
Loss = 1.2238e-03, PNorm = 154.5098, GNorm = 0.0938, lr_0 = 1.6344e-04
Loss = 1.6426e-03, PNorm = 154.5131, GNorm = 0.1461, lr_0 = 1.6333e-04
Loss = 9.4191e-04, PNorm = 154.5156, GNorm = 0.0276, lr_0 = 1.6322e-04
Loss = 1.1506e-03, PNorm = 154.5165, GNorm = 0.1057, lr_0 = 1.6311e-04
Loss = 2.4422e-03, PNorm = 154.5179, GNorm = 0.1334, lr_0 = 1.6299e-04
Loss = 2.7204e-03, PNorm = 154.5199, GNorm = 0.0787, lr_0 = 1.6288e-04
Loss = 2.5565e-03, PNorm = 154.5242, GNorm = 0.0952, lr_0 = 1.6277e-04
Loss = 1.2741e-03, PNorm = 154.5239, GNorm = 0.0333, lr_0 = 1.6266e-04
Loss = 1.8997e-03, PNorm = 154.5219, GNorm = 0.0754, lr_0 = 1.6255e-04
Loss = 1.0789e-03, PNorm = 154.5234, GNorm = 0.0322, lr_0 = 1.6244e-04
Loss = 1.5000e-03, PNorm = 154.5263, GNorm = 0.0303, lr_0 = 1.6233e-04
Loss = 9.3844e-04, PNorm = 154.5289, GNorm = 0.0909, lr_0 = 1.6221e-04
Loss = 1.4343e-03, PNorm = 154.5329, GNorm = 0.1108, lr_0 = 1.6210e-04
Loss = 9.1895e-04, PNorm = 154.5350, GNorm = 0.0302, lr_0 = 1.6199e-04
Loss = 1.9124e-03, PNorm = 154.5360, GNorm = 0.0402, lr_0 = 1.6188e-04
Loss = 9.4992e-04, PNorm = 154.5370, GNorm = 0.0642, lr_0 = 1.6177e-04
Loss = 1.1685e-03, PNorm = 154.5384, GNorm = 0.0461, lr_0 = 1.6166e-04
Loss = 1.6323e-03, PNorm = 154.5407, GNorm = 0.0746, lr_0 = 1.6155e-04
Loss = 9.4731e-04, PNorm = 154.5423, GNorm = 0.1005, lr_0 = 1.6144e-04
Loss = 1.2901e-03, PNorm = 154.5456, GNorm = 0.0500, lr_0 = 1.6133e-04
Loss = 2.4669e-03, PNorm = 154.5480, GNorm = 0.1143, lr_0 = 1.6122e-04
Loss = 3.9842e-03, PNorm = 154.5495, GNorm = 0.1317, lr_0 = 1.6111e-04
Loss = 1.5515e-03, PNorm = 154.5515, GNorm = 0.0860, lr_0 = 1.6100e-04
Loss = 1.3996e-03, PNorm = 154.5547, GNorm = 0.0713, lr_0 = 1.6089e-04
Loss = 4.2059e-03, PNorm = 154.5591, GNorm = 0.2378, lr_0 = 1.6078e-04
Loss = 1.3413e-03, PNorm = 154.5599, GNorm = 0.1892, lr_0 = 1.6067e-04
Loss = 9.7577e-04, PNorm = 154.5619, GNorm = 0.1558, lr_0 = 1.6056e-04
Loss = 1.8587e-03, PNorm = 154.5630, GNorm = 0.0397, lr_0 = 1.6045e-04
Loss = 1.0108e-03, PNorm = 154.5649, GNorm = 0.1091, lr_0 = 1.6034e-04
Loss = 1.0146e-03, PNorm = 154.5664, GNorm = 0.0797, lr_0 = 1.6023e-04
Loss = 1.1584e-03, PNorm = 154.5679, GNorm = 0.1407, lr_0 = 1.6012e-04
Loss = 1.6243e-03, PNorm = 154.5692, GNorm = 0.0891, lr_0 = 1.6001e-04
Loss = 1.8412e-03, PNorm = 154.5716, GNorm = 0.0846, lr_0 = 1.5990e-04
Loss = 1.1652e-03, PNorm = 154.5751, GNorm = 0.0602, lr_0 = 1.5979e-04
Loss = 1.2682e-03, PNorm = 154.5780, GNorm = 0.0500, lr_0 = 1.5968e-04
Loss = 1.7494e-03, PNorm = 154.5791, GNorm = 0.2799, lr_0 = 1.5957e-04
Loss = 9.2216e-04, PNorm = 154.5803, GNorm = 0.0786, lr_0 = 1.5946e-04
Loss = 1.0862e-03, PNorm = 154.5845, GNorm = 0.0608, lr_0 = 1.5935e-04
Loss = 2.2432e-03, PNorm = 154.5877, GNorm = 0.0685, lr_0 = 1.5924e-04
Loss = 1.4654e-03, PNorm = 154.5913, GNorm = 0.1243, lr_0 = 1.5913e-04
Loss = 1.4730e-03, PNorm = 154.5946, GNorm = 0.0974, lr_0 = 1.5902e-04
Loss = 2.0491e-03, PNorm = 154.5970, GNorm = 0.1277, lr_0 = 1.5891e-04
Loss = 1.2721e-03, PNorm = 154.5981, GNorm = 0.1052, lr_0 = 1.5880e-04
Loss = 1.7826e-03, PNorm = 154.5992, GNorm = 0.0518, lr_0 = 1.5870e-04
Loss = 2.1520e-03, PNorm = 154.6017, GNorm = 0.1273, lr_0 = 1.5859e-04
Loss = 9.0228e-04, PNorm = 154.6052, GNorm = 0.1382, lr_0 = 1.5848e-04
Loss = 1.1868e-03, PNorm = 154.6077, GNorm = 0.1120, lr_0 = 1.5837e-04
Loss = 1.2872e-03, PNorm = 154.6119, GNorm = 0.0785, lr_0 = 1.5826e-04
Loss = 1.5429e-03, PNorm = 154.6170, GNorm = 0.1227, lr_0 = 1.5815e-04
Loss = 1.8711e-03, PNorm = 154.6193, GNorm = 0.1108, lr_0 = 1.5804e-04
Loss = 1.2899e-03, PNorm = 154.6192, GNorm = 0.1244, lr_0 = 1.5794e-04
Loss = 1.2995e-03, PNorm = 154.6215, GNorm = 0.1015, lr_0 = 1.5783e-04
Loss = 1.1545e-03, PNorm = 154.6232, GNorm = 0.1491, lr_0 = 1.5772e-04
Loss = 1.4309e-03, PNorm = 154.6272, GNorm = 0.1780, lr_0 = 1.5761e-04
Loss = 1.8283e-03, PNorm = 154.6302, GNorm = 0.1745, lr_0 = 1.5750e-04
Loss = 2.2674e-03, PNorm = 154.6314, GNorm = 0.1543, lr_0 = 1.5740e-04
Loss = 2.1016e-03, PNorm = 154.6350, GNorm = 0.2245, lr_0 = 1.5729e-04
Loss = 2.5655e-03, PNorm = 154.6373, GNorm = 0.0488, lr_0 = 1.5718e-04
Loss = 1.9385e-03, PNorm = 154.6401, GNorm = 0.1809, lr_0 = 1.5707e-04
Loss = 1.0243e-03, PNorm = 154.6452, GNorm = 0.0661, lr_0 = 1.5697e-04
Loss = 1.6393e-03, PNorm = 154.6473, GNorm = 0.2651, lr_0 = 1.5686e-04
Loss = 1.2387e-03, PNorm = 154.6493, GNorm = 0.0843, lr_0 = 1.5675e-04
Loss = 1.3945e-03, PNorm = 154.6501, GNorm = 0.1465, lr_0 = 1.5664e-04
Loss = 1.4417e-03, PNorm = 154.6521, GNorm = 0.2489, lr_0 = 1.5654e-04
Loss = 1.6854e-03, PNorm = 154.6545, GNorm = 0.0838, lr_0 = 1.5643e-04
Loss = 1.6795e-03, PNorm = 154.6559, GNorm = 0.0880, lr_0 = 1.5632e-04
Loss = 1.6715e-03, PNorm = 154.6595, GNorm = 0.1071, lr_0 = 1.5621e-04
Loss = 1.1302e-03, PNorm = 154.6629, GNorm = 0.1286, lr_0 = 1.5611e-04
Loss = 1.0946e-03, PNorm = 154.6660, GNorm = 0.0733, lr_0 = 1.5600e-04
Loss = 9.4998e-04, PNorm = 154.6670, GNorm = 0.0673, lr_0 = 1.5589e-04
Loss = 1.1409e-03, PNorm = 154.6681, GNorm = 0.1050, lr_0 = 1.5579e-04
Loss = 8.0570e-04, PNorm = 154.6684, GNorm = 0.1408, lr_0 = 1.5568e-04
Loss = 1.0887e-03, PNorm = 154.6692, GNorm = 0.0611, lr_0 = 1.5557e-04
Loss = 1.5753e-03, PNorm = 154.6695, GNorm = 0.3541, lr_0 = 1.5547e-04
Loss = 1.3786e-03, PNorm = 154.6705, GNorm = 0.0811, lr_0 = 1.5536e-04
Loss = 9.9898e-04, PNorm = 154.6735, GNorm = 0.0508, lr_0 = 1.5525e-04
Loss = 1.2177e-03, PNorm = 154.6779, GNorm = 0.0983, lr_0 = 1.5515e-04
Loss = 1.1548e-03, PNorm = 154.6805, GNorm = 0.0541, lr_0 = 1.5504e-04
Loss = 2.4443e-03, PNorm = 154.6804, GNorm = 0.1918, lr_0 = 1.5493e-04
Loss = 4.5933e-03, PNorm = 154.6804, GNorm = 0.1992, lr_0 = 1.5483e-04
Loss = 1.1580e-03, PNorm = 154.6832, GNorm = 0.1377, lr_0 = 1.5472e-04
Loss = 4.7503e-03, PNorm = 154.6857, GNorm = 0.0300, lr_0 = 1.5462e-04
Loss = 2.5666e-03, PNorm = 154.6866, GNorm = 0.1063, lr_0 = 1.5451e-04
Loss = 1.6418e-03, PNorm = 154.6894, GNorm = 0.1860, lr_0 = 1.5440e-04
Loss = 1.4198e-03, PNorm = 154.6901, GNorm = 0.0432, lr_0 = 1.5430e-04
Loss = 1.0234e-03, PNorm = 154.6920, GNorm = 0.0803, lr_0 = 1.5419e-04
Loss = 9.4828e-04, PNorm = 154.6930, GNorm = 0.0875, lr_0 = 1.5409e-04
Loss = 9.7153e-04, PNorm = 154.6930, GNorm = 0.1304, lr_0 = 1.5398e-04
Loss = 1.9637e-03, PNorm = 154.6956, GNorm = 0.1388, lr_0 = 1.5388e-04
Loss = 1.1475e-03, PNorm = 154.7000, GNorm = 0.0315, lr_0 = 1.5377e-04
Loss = 1.4715e-03, PNorm = 154.6992, GNorm = 0.1411, lr_0 = 1.5367e-04
Loss = 1.1254e-03, PNorm = 154.7021, GNorm = 0.1095, lr_0 = 1.5356e-04
Loss = 2.8816e-03, PNorm = 154.7039, GNorm = 0.0796, lr_0 = 1.5346e-04
Loss = 1.5430e-03, PNorm = 154.7068, GNorm = 0.1180, lr_0 = 1.5335e-04
Loss = 2.8047e-03, PNorm = 154.7111, GNorm = 0.1469, lr_0 = 1.5325e-04
Loss = 1.5746e-03, PNorm = 154.7146, GNorm = 0.0806, lr_0 = 1.5314e-04
Loss = 1.5273e-03, PNorm = 154.7191, GNorm = 0.1120, lr_0 = 1.5304e-04
Loss = 1.0647e-03, PNorm = 154.7223, GNorm = 0.1033, lr_0 = 1.5293e-04
Loss = 1.3165e-03, PNorm = 154.7265, GNorm = 0.1522, lr_0 = 1.5283e-04
Loss = 1.6970e-03, PNorm = 154.7287, GNorm = 0.0904, lr_0 = 1.5272e-04
Loss = 1.1440e-03, PNorm = 154.7332, GNorm = 0.0696, lr_0 = 1.5262e-04
Loss = 1.0008e-03, PNorm = 154.7360, GNorm = 0.1650, lr_0 = 1.5251e-04
Loss = 1.0285e-03, PNorm = 154.7407, GNorm = 0.0676, lr_0 = 1.5241e-04
Loss = 1.6265e-03, PNorm = 154.7434, GNorm = 0.2196, lr_0 = 1.5230e-04
Loss = 1.6785e-03, PNorm = 154.7458, GNorm = 0.0545, lr_0 = 1.5220e-04
Loss = 1.1273e-03, PNorm = 154.7483, GNorm = 0.0687, lr_0 = 1.5209e-04
Loss = 1.9722e-03, PNorm = 154.7518, GNorm = 0.0983, lr_0 = 1.5199e-04
Loss = 2.8921e-03, PNorm = 154.7546, GNorm = 0.2296, lr_0 = 1.5189e-04
Loss = 1.0359e-03, PNorm = 154.7578, GNorm = 0.0675, lr_0 = 1.5178e-04
Loss = 1.4342e-03, PNorm = 154.7601, GNorm = 0.1181, lr_0 = 1.5168e-04
Loss = 2.9097e-03, PNorm = 154.7645, GNorm = 0.0603, lr_0 = 1.5157e-04
Loss = 1.1192e-03, PNorm = 154.7691, GNorm = 0.0607, lr_0 = 1.5147e-04
Loss = 1.5387e-03, PNorm = 154.7725, GNorm = 0.0859, lr_0 = 1.5137e-04
Loss = 1.5251e-03, PNorm = 154.7754, GNorm = 0.0868, lr_0 = 1.5126e-04
Loss = 5.2143e-03, PNorm = 154.7759, GNorm = 0.0967, lr_0 = 1.5116e-04
Loss = 2.1978e-03, PNorm = 154.7790, GNorm = 0.2091, lr_0 = 1.5106e-04
Loss = 2.9508e-03, PNorm = 154.7811, GNorm = 0.0705, lr_0 = 1.5095e-04
Loss = 1.3417e-03, PNorm = 154.7846, GNorm = 0.3397, lr_0 = 1.5085e-04
Validation mae = 0.475093
Epoch 25
Loss = 9.9006e-04, PNorm = 154.7881, GNorm = 0.1021, lr_0 = 1.5075e-04
Loss = 1.2023e-03, PNorm = 154.7908, GNorm = 0.0468, lr_0 = 1.5064e-04
Loss = 2.7847e-03, PNorm = 154.7937, GNorm = 0.0785, lr_0 = 1.5054e-04
Loss = 9.0679e-04, PNorm = 154.7950, GNorm = 0.0395, lr_0 = 1.5044e-04
Loss = 1.1158e-03, PNorm = 154.7975, GNorm = 0.0995, lr_0 = 1.5033e-04
Loss = 3.6457e-03, PNorm = 154.7983, GNorm = 0.2233, lr_0 = 1.5023e-04
Loss = 1.7343e-03, PNorm = 154.8011, GNorm = 0.1599, lr_0 = 1.5013e-04
Loss = 9.4093e-04, PNorm = 154.8025, GNorm = 0.1681, lr_0 = 1.5002e-04
Loss = 8.3466e-04, PNorm = 154.8027, GNorm = 0.0839, lr_0 = 1.4992e-04
Loss = 1.4518e-03, PNorm = 154.8042, GNorm = 0.0688, lr_0 = 1.4982e-04
Loss = 2.3517e-03, PNorm = 154.8061, GNorm = 0.0665, lr_0 = 1.4972e-04
Loss = 2.5134e-03, PNorm = 154.8104, GNorm = 0.1199, lr_0 = 1.4961e-04
Loss = 1.7805e-03, PNorm = 154.8119, GNorm = 0.0443, lr_0 = 1.4951e-04
Loss = 1.0189e-03, PNorm = 154.8153, GNorm = 0.1541, lr_0 = 1.4941e-04
Loss = 1.1966e-03, PNorm = 154.8186, GNorm = 0.1085, lr_0 = 1.4931e-04
Loss = 8.6007e-04, PNorm = 154.8210, GNorm = 0.0557, lr_0 = 1.4920e-04
Loss = 1.4861e-03, PNorm = 154.8210, GNorm = 0.1453, lr_0 = 1.4910e-04
Loss = 1.6641e-03, PNorm = 154.8233, GNorm = 0.1406, lr_0 = 1.4900e-04
Loss = 8.2237e-04, PNorm = 154.8240, GNorm = 0.1420, lr_0 = 1.4890e-04
Loss = 8.4899e-04, PNorm = 154.8251, GNorm = 0.1012, lr_0 = 1.4880e-04
Loss = 2.8984e-03, PNorm = 154.8275, GNorm = 0.1693, lr_0 = 1.4869e-04
Loss = 2.5394e-03, PNorm = 154.8306, GNorm = 0.0358, lr_0 = 1.4859e-04
Loss = 8.3540e-04, PNorm = 154.8344, GNorm = 0.0232, lr_0 = 1.4849e-04
Loss = 1.3395e-03, PNorm = 154.8363, GNorm = 0.0831, lr_0 = 1.4839e-04
Loss = 9.5526e-04, PNorm = 154.8372, GNorm = 0.0845, lr_0 = 1.4829e-04
Loss = 3.1389e-03, PNorm = 154.8393, GNorm = 0.1388, lr_0 = 1.4818e-04
Loss = 9.1005e-04, PNorm = 154.8397, GNorm = 0.0723, lr_0 = 1.4808e-04
Loss = 9.0777e-04, PNorm = 154.8415, GNorm = 0.1038, lr_0 = 1.4798e-04
Loss = 1.5573e-03, PNorm = 154.8411, GNorm = 0.0645, lr_0 = 1.4788e-04
Loss = 1.4713e-03, PNorm = 154.8428, GNorm = 0.0657, lr_0 = 1.4778e-04
Loss = 1.5162e-03, PNorm = 154.8452, GNorm = 0.0869, lr_0 = 1.4768e-04
Loss = 1.3604e-03, PNorm = 154.8469, GNorm = 0.1642, lr_0 = 1.4758e-04
Loss = 8.9547e-04, PNorm = 154.8489, GNorm = 0.1174, lr_0 = 1.4748e-04
Loss = 9.1952e-04, PNorm = 154.8512, GNorm = 0.0279, lr_0 = 1.4737e-04
Loss = 2.8280e-03, PNorm = 154.8547, GNorm = 0.1552, lr_0 = 1.4727e-04
Loss = 9.4707e-04, PNorm = 154.8563, GNorm = 0.0457, lr_0 = 1.4717e-04
Loss = 2.1653e-03, PNorm = 154.8590, GNorm = 0.1418, lr_0 = 1.4707e-04
Loss = 3.7376e-03, PNorm = 154.8599, GNorm = 0.2616, lr_0 = 1.4697e-04
Loss = 1.5505e-03, PNorm = 154.8645, GNorm = 0.1013, lr_0 = 1.4687e-04
Loss = 1.2556e-03, PNorm = 154.8668, GNorm = 0.0828, lr_0 = 1.4677e-04
Loss = 1.4156e-03, PNorm = 154.8675, GNorm = 0.0709, lr_0 = 1.4667e-04
Loss = 1.4079e-03, PNorm = 154.8688, GNorm = 0.2513, lr_0 = 1.4657e-04
Loss = 8.6879e-04, PNorm = 154.8696, GNorm = 0.0734, lr_0 = 1.4647e-04
Loss = 1.6291e-03, PNorm = 154.8718, GNorm = 0.0733, lr_0 = 1.4637e-04
Loss = 9.6526e-04, PNorm = 154.8736, GNorm = 0.0611, lr_0 = 1.4627e-04
Loss = 1.5656e-03, PNorm = 154.8754, GNorm = 0.1251, lr_0 = 1.4617e-04
Loss = 2.1002e-03, PNorm = 154.8771, GNorm = 0.0897, lr_0 = 1.4607e-04
Loss = 1.4516e-03, PNorm = 154.8825, GNorm = 0.0675, lr_0 = 1.4597e-04
Loss = 8.7912e-04, PNorm = 154.8850, GNorm = 0.0783, lr_0 = 1.4587e-04
Loss = 1.2200e-03, PNorm = 154.8870, GNorm = 0.1748, lr_0 = 1.4577e-04
Loss = 1.0800e-03, PNorm = 154.8884, GNorm = 0.0922, lr_0 = 1.4567e-04
Loss = 1.3153e-03, PNorm = 154.8899, GNorm = 0.1031, lr_0 = 1.4557e-04
Loss = 9.1538e-04, PNorm = 154.8910, GNorm = 0.1791, lr_0 = 1.4547e-04
Loss = 2.4167e-03, PNorm = 154.8921, GNorm = 0.0312, lr_0 = 1.4537e-04
Loss = 7.7133e-04, PNorm = 154.8928, GNorm = 0.1559, lr_0 = 1.4527e-04
Loss = 7.8004e-04, PNorm = 154.8945, GNorm = 0.0732, lr_0 = 1.4517e-04
Loss = 2.0399e-03, PNorm = 154.8957, GNorm = 0.0785, lr_0 = 1.4507e-04
Loss = 1.6627e-03, PNorm = 154.8966, GNorm = 0.0234, lr_0 = 1.4497e-04
Loss = 1.5787e-03, PNorm = 154.8996, GNorm = 0.0609, lr_0 = 1.4487e-04
Loss = 1.0778e-03, PNorm = 154.9036, GNorm = 0.1835, lr_0 = 1.4477e-04
Loss = 1.7147e-03, PNorm = 154.9042, GNorm = 0.1017, lr_0 = 1.4467e-04
Loss = 8.3793e-04, PNorm = 154.9070, GNorm = 0.1094, lr_0 = 1.4457e-04
Loss = 9.0069e-04, PNorm = 154.9099, GNorm = 0.1025, lr_0 = 1.4447e-04
Loss = 1.1782e-03, PNorm = 154.9127, GNorm = 0.1321, lr_0 = 1.4438e-04
Loss = 1.0260e-03, PNorm = 154.9166, GNorm = 0.1434, lr_0 = 1.4428e-04
Loss = 1.8126e-03, PNorm = 154.9186, GNorm = 0.1433, lr_0 = 1.4418e-04
Loss = 1.1050e-03, PNorm = 154.9201, GNorm = 0.0957, lr_0 = 1.4408e-04
Loss = 1.5114e-03, PNorm = 154.9190, GNorm = 0.1019, lr_0 = 1.4398e-04
Loss = 1.1246e-03, PNorm = 154.9209, GNorm = 0.1431, lr_0 = 1.4388e-04
Loss = 1.0827e-03, PNorm = 154.9234, GNorm = 0.0558, lr_0 = 1.4378e-04
Loss = 1.9243e-03, PNorm = 154.9245, GNorm = 0.0622, lr_0 = 1.4368e-04
Loss = 1.5101e-03, PNorm = 154.9254, GNorm = 0.0881, lr_0 = 1.4359e-04
Loss = 1.0563e-03, PNorm = 154.9282, GNorm = 0.1080, lr_0 = 1.4349e-04
Loss = 1.1326e-03, PNorm = 154.9286, GNorm = 0.0962, lr_0 = 1.4339e-04
Loss = 4.4880e-03, PNorm = 154.9292, GNorm = 0.0420, lr_0 = 1.4329e-04
Loss = 1.1731e-03, PNorm = 154.9323, GNorm = 0.0937, lr_0 = 1.4319e-04
Loss = 1.5295e-03, PNorm = 154.9337, GNorm = 0.1539, lr_0 = 1.4310e-04
Loss = 3.8415e-03, PNorm = 154.9362, GNorm = 0.1265, lr_0 = 1.4300e-04
Loss = 1.9779e-03, PNorm = 154.9372, GNorm = 0.0234, lr_0 = 1.4290e-04
Loss = 1.1022e-03, PNorm = 154.9412, GNorm = 0.2168, lr_0 = 1.4280e-04
Loss = 8.5239e-04, PNorm = 154.9432, GNorm = 0.0577, lr_0 = 1.4270e-04
Loss = 1.0457e-03, PNorm = 154.9448, GNorm = 0.1169, lr_0 = 1.4261e-04
Loss = 9.2433e-04, PNorm = 154.9472, GNorm = 0.1029, lr_0 = 1.4251e-04
Loss = 2.3681e-03, PNorm = 154.9487, GNorm = 0.1896, lr_0 = 1.4241e-04
Loss = 9.8806e-04, PNorm = 154.9514, GNorm = 0.0541, lr_0 = 1.4231e-04
Loss = 8.7503e-04, PNorm = 154.9534, GNorm = 0.1036, lr_0 = 1.4222e-04
Loss = 2.2636e-03, PNorm = 154.9559, GNorm = 0.0881, lr_0 = 1.4212e-04
Loss = 1.9933e-03, PNorm = 154.9581, GNorm = 0.1215, lr_0 = 1.4202e-04
Loss = 8.2294e-04, PNorm = 154.9592, GNorm = 0.1071, lr_0 = 1.4192e-04
Loss = 1.8052e-03, PNorm = 154.9622, GNorm = 0.1121, lr_0 = 1.4183e-04
Loss = 1.1070e-03, PNorm = 154.9640, GNorm = 0.0774, lr_0 = 1.4173e-04
Loss = 2.0058e-03, PNorm = 154.9665, GNorm = 0.1697, lr_0 = 1.4163e-04
Loss = 1.4296e-03, PNorm = 154.9674, GNorm = 0.1127, lr_0 = 1.4153e-04
Loss = 3.8308e-03, PNorm = 154.9705, GNorm = 0.1308, lr_0 = 1.4144e-04
Loss = 3.4132e-03, PNorm = 154.9745, GNorm = 0.0753, lr_0 = 1.4134e-04
Loss = 1.9671e-03, PNorm = 154.9772, GNorm = 0.0464, lr_0 = 1.4124e-04
Loss = 1.1202e-03, PNorm = 154.9796, GNorm = 0.0661, lr_0 = 1.4115e-04
Loss = 1.3318e-03, PNorm = 154.9818, GNorm = 0.1261, lr_0 = 1.4105e-04
Loss = 9.7217e-04, PNorm = 154.9839, GNorm = 0.0851, lr_0 = 1.4095e-04
Loss = 1.1611e-03, PNorm = 154.9878, GNorm = 0.0773, lr_0 = 1.4086e-04
Loss = 1.6197e-03, PNorm = 154.9894, GNorm = 0.1465, lr_0 = 1.4076e-04
Loss = 8.4809e-04, PNorm = 154.9907, GNorm = 0.0394, lr_0 = 1.4066e-04
Loss = 2.8915e-03, PNorm = 154.9932, GNorm = 0.0968, lr_0 = 1.4057e-04
Loss = 9.0599e-04, PNorm = 154.9947, GNorm = 0.0876, lr_0 = 1.4047e-04
Loss = 7.8410e-04, PNorm = 154.9959, GNorm = 0.1387, lr_0 = 1.4038e-04
Loss = 1.0459e-03, PNorm = 154.9980, GNorm = 0.0910, lr_0 = 1.4028e-04
Loss = 1.5865e-03, PNorm = 155.0007, GNorm = 0.0230, lr_0 = 1.4018e-04
Loss = 8.4171e-04, PNorm = 155.0038, GNorm = 0.1553, lr_0 = 1.4009e-04
Loss = 2.5004e-03, PNorm = 155.0056, GNorm = 0.1862, lr_0 = 1.3999e-04
Loss = 1.7660e-03, PNorm = 155.0073, GNorm = 0.1109, lr_0 = 1.3990e-04
Loss = 7.1952e-04, PNorm = 155.0086, GNorm = 0.0494, lr_0 = 1.3980e-04
Loss = 8.8984e-04, PNorm = 155.0082, GNorm = 0.0350, lr_0 = 1.3970e-04
Loss = 8.8374e-04, PNorm = 155.0096, GNorm = 0.0500, lr_0 = 1.3961e-04
Loss = 1.3381e-03, PNorm = 155.0121, GNorm = 0.0721, lr_0 = 1.3951e-04
Loss = 1.2195e-03, PNorm = 155.0139, GNorm = 0.0333, lr_0 = 1.3942e-04
Loss = 1.4227e-03, PNorm = 155.0149, GNorm = 0.0725, lr_0 = 1.3932e-04
Loss = 9.1745e-04, PNorm = 155.0155, GNorm = 0.0582, lr_0 = 1.3923e-04
Loss = 2.3599e-03, PNorm = 155.0173, GNorm = 0.0661, lr_0 = 1.3913e-04
Loss = 8.1715e-04, PNorm = 155.0191, GNorm = 0.0931, lr_0 = 1.3904e-04
Loss = 1.4564e-03, PNorm = 155.0219, GNorm = 0.0930, lr_0 = 1.3894e-04
Validation mae = 0.474563
Epoch 26
Loss = 1.4706e-03, PNorm = 155.0238, GNorm = 0.0599, lr_0 = 1.3884e-04
Loss = 1.8485e-03, PNorm = 155.0246, GNorm = 0.1790, lr_0 = 1.3875e-04
Loss = 2.3033e-03, PNorm = 155.0265, GNorm = 0.2274, lr_0 = 1.3865e-04
Loss = 1.4844e-03, PNorm = 155.0292, GNorm = 0.0249, lr_0 = 1.3856e-04
Loss = 9.4265e-04, PNorm = 155.0310, GNorm = 0.1182, lr_0 = 1.3846e-04
Loss = 1.0593e-03, PNorm = 155.0339, GNorm = 0.1201, lr_0 = 1.3837e-04
Loss = 8.6087e-04, PNorm = 155.0383, GNorm = 0.0960, lr_0 = 1.3828e-04
Loss = 6.2265e-04, PNorm = 155.0411, GNorm = 0.0346, lr_0 = 1.3818e-04
Loss = 1.1625e-03, PNorm = 155.0429, GNorm = 0.0754, lr_0 = 1.3809e-04
Loss = 6.4286e-04, PNorm = 155.0439, GNorm = 0.0572, lr_0 = 1.3799e-04
Loss = 1.3042e-03, PNorm = 155.0465, GNorm = 0.0538, lr_0 = 1.3790e-04
Loss = 6.9705e-04, PNorm = 155.0482, GNorm = 0.0891, lr_0 = 1.3780e-04
Loss = 1.4179e-03, PNorm = 155.0503, GNorm = 0.0775, lr_0 = 1.3771e-04
Loss = 1.4183e-03, PNorm = 155.0522, GNorm = 0.1016, lr_0 = 1.3761e-04
Loss = 2.0028e-03, PNorm = 155.0526, GNorm = 0.0414, lr_0 = 1.3752e-04
Loss = 1.5718e-03, PNorm = 155.0537, GNorm = 0.2293, lr_0 = 1.3742e-04
Loss = 6.8135e-04, PNorm = 155.0571, GNorm = 0.1209, lr_0 = 1.3733e-04
Loss = 2.5277e-03, PNorm = 155.0600, GNorm = 0.1931, lr_0 = 1.3724e-04
Loss = 1.1706e-03, PNorm = 155.0605, GNorm = 0.1516, lr_0 = 1.3714e-04
Loss = 9.1485e-04, PNorm = 155.0628, GNorm = 0.1284, lr_0 = 1.3705e-04
Loss = 1.7156e-03, PNorm = 155.0642, GNorm = 0.1329, lr_0 = 1.3695e-04
Loss = 8.7160e-04, PNorm = 155.0673, GNorm = 0.1135, lr_0 = 1.3686e-04
Loss = 7.8593e-04, PNorm = 155.0695, GNorm = 0.1299, lr_0 = 1.3677e-04
Loss = 1.6907e-03, PNorm = 155.0701, GNorm = 0.0307, lr_0 = 1.3667e-04
Loss = 7.6067e-04, PNorm = 155.0708, GNorm = 0.2024, lr_0 = 1.3658e-04
Loss = 4.0430e-03, PNorm = 155.0730, GNorm = 0.1366, lr_0 = 1.3649e-04
Loss = 1.2897e-03, PNorm = 155.0765, GNorm = 0.0528, lr_0 = 1.3639e-04
Loss = 1.0647e-03, PNorm = 155.0801, GNorm = 0.2608, lr_0 = 1.3630e-04
Loss = 3.1342e-03, PNorm = 155.0828, GNorm = 0.0773, lr_0 = 1.3621e-04
Loss = 8.2498e-04, PNorm = 155.0841, GNorm = 0.1199, lr_0 = 1.3611e-04
Loss = 3.3337e-03, PNorm = 155.0843, GNorm = 0.0852, lr_0 = 1.3602e-04
Loss = 9.5071e-04, PNorm = 155.0855, GNorm = 0.1949, lr_0 = 1.3593e-04
Loss = 1.7229e-03, PNorm = 155.0867, GNorm = 0.1660, lr_0 = 1.3583e-04
Loss = 1.0471e-03, PNorm = 155.0891, GNorm = 0.0436, lr_0 = 1.3574e-04
Loss = 1.1055e-03, PNorm = 155.0920, GNorm = 0.1043, lr_0 = 1.3565e-04
Loss = 1.9983e-03, PNorm = 155.0913, GNorm = 0.0646, lr_0 = 1.3555e-04
Loss = 1.0741e-03, PNorm = 155.0932, GNorm = 0.1085, lr_0 = 1.3546e-04
Loss = 1.7567e-03, PNorm = 155.0955, GNorm = 0.1212, lr_0 = 1.3537e-04
Loss = 9.2834e-04, PNorm = 155.0979, GNorm = 0.0533, lr_0 = 1.3528e-04
Loss = 1.6568e-03, PNorm = 155.1009, GNorm = 0.0538, lr_0 = 1.3518e-04
Loss = 1.9415e-03, PNorm = 155.1034, GNorm = 0.0286, lr_0 = 1.3509e-04
Loss = 2.7065e-03, PNorm = 155.1043, GNorm = 0.0496, lr_0 = 1.3500e-04
Loss = 1.4239e-03, PNorm = 155.1057, GNorm = 0.0766, lr_0 = 1.3491e-04
Loss = 1.4577e-03, PNorm = 155.1052, GNorm = 0.1371, lr_0 = 1.3481e-04
Loss = 1.5309e-03, PNorm = 155.1072, GNorm = 0.1163, lr_0 = 1.3472e-04
Loss = 2.5113e-03, PNorm = 155.1083, GNorm = 0.1573, lr_0 = 1.3463e-04
Loss = 1.5617e-03, PNorm = 155.1113, GNorm = 0.0868, lr_0 = 1.3454e-04
Loss = 8.5621e-04, PNorm = 155.1128, GNorm = 0.0669, lr_0 = 1.3444e-04
Loss = 1.6510e-03, PNorm = 155.1154, GNorm = 0.1309, lr_0 = 1.3435e-04
Loss = 7.0765e-04, PNorm = 155.1172, GNorm = 0.1364, lr_0 = 1.3426e-04
Loss = 8.0769e-04, PNorm = 155.1194, GNorm = 0.1241, lr_0 = 1.3417e-04
Loss = 1.7063e-03, PNorm = 155.1230, GNorm = 0.1894, lr_0 = 1.3408e-04
Loss = 1.5610e-03, PNorm = 155.1253, GNorm = 0.0437, lr_0 = 1.3398e-04
Loss = 9.6928e-04, PNorm = 155.1271, GNorm = 0.0301, lr_0 = 1.3389e-04
Loss = 8.9846e-04, PNorm = 155.1275, GNorm = 0.0247, lr_0 = 1.3380e-04
Loss = 1.3884e-03, PNorm = 155.1278, GNorm = 0.1320, lr_0 = 1.3371e-04
Loss = 8.5126e-04, PNorm = 155.1305, GNorm = 0.0895, lr_0 = 1.3362e-04
Loss = 1.3454e-03, PNorm = 155.1312, GNorm = 0.1276, lr_0 = 1.3353e-04
Loss = 1.0660e-03, PNorm = 155.1332, GNorm = 0.1607, lr_0 = 1.3343e-04
Loss = 1.0505e-03, PNorm = 155.1354, GNorm = 0.0801, lr_0 = 1.3334e-04
Loss = 8.9004e-04, PNorm = 155.1384, GNorm = 0.0681, lr_0 = 1.3325e-04
Loss = 1.1479e-03, PNorm = 155.1400, GNorm = 0.0655, lr_0 = 1.3316e-04
Loss = 2.1070e-03, PNorm = 155.1420, GNorm = 0.0636, lr_0 = 1.3307e-04
Loss = 1.3001e-03, PNorm = 155.1434, GNorm = 0.0525, lr_0 = 1.3298e-04
Loss = 8.3966e-04, PNorm = 155.1456, GNorm = 0.0726, lr_0 = 1.3289e-04
Loss = 1.0779e-03, PNorm = 155.1481, GNorm = 0.0345, lr_0 = 1.3280e-04
Loss = 8.0151e-04, PNorm = 155.1502, GNorm = 0.0387, lr_0 = 1.3270e-04
Loss = 3.4997e-03, PNorm = 155.1510, GNorm = 0.1394, lr_0 = 1.3261e-04
Loss = 6.9381e-04, PNorm = 155.1503, GNorm = 0.1637, lr_0 = 1.3252e-04
Loss = 1.6138e-03, PNorm = 155.1541, GNorm = 0.0440, lr_0 = 1.3243e-04
Loss = 1.6015e-03, PNorm = 155.1586, GNorm = 0.0723, lr_0 = 1.3234e-04
Loss = 1.3726e-03, PNorm = 155.1619, GNorm = 0.0797, lr_0 = 1.3225e-04
Loss = 2.0192e-03, PNorm = 155.1650, GNorm = 0.1154, lr_0 = 1.3216e-04
Loss = 8.2953e-04, PNorm = 155.1643, GNorm = 0.0830, lr_0 = 1.3207e-04
Loss = 6.9286e-04, PNorm = 155.1649, GNorm = 0.1075, lr_0 = 1.3198e-04
Loss = 6.9517e-04, PNorm = 155.1667, GNorm = 0.1256, lr_0 = 1.3189e-04
Loss = 8.2324e-04, PNorm = 155.1687, GNorm = 0.1317, lr_0 = 1.3180e-04
Loss = 2.3832e-03, PNorm = 155.1693, GNorm = 0.1080, lr_0 = 1.3171e-04
Loss = 2.0403e-03, PNorm = 155.1702, GNorm = 0.1982, lr_0 = 1.3162e-04
Loss = 5.7783e-04, PNorm = 155.1720, GNorm = 0.1181, lr_0 = 1.3153e-04
Loss = 1.2083e-03, PNorm = 155.1735, GNorm = 0.0399, lr_0 = 1.3144e-04
Loss = 7.3943e-04, PNorm = 155.1739, GNorm = 0.0324, lr_0 = 1.3135e-04
Loss = 9.4909e-04, PNorm = 155.1737, GNorm = 0.0356, lr_0 = 1.3126e-04
Loss = 1.9517e-03, PNorm = 155.1738, GNorm = 0.1014, lr_0 = 1.3117e-04
Loss = 9.1276e-04, PNorm = 155.1745, GNorm = 0.0506, lr_0 = 1.3108e-04
Loss = 7.4395e-04, PNorm = 155.1773, GNorm = 0.1113, lr_0 = 1.3099e-04
Loss = 1.3414e-03, PNorm = 155.1796, GNorm = 0.1009, lr_0 = 1.3090e-04
Loss = 1.4730e-03, PNorm = 155.1816, GNorm = 0.1245, lr_0 = 1.3081e-04
Loss = 1.3244e-03, PNorm = 155.1851, GNorm = 0.1066, lr_0 = 1.3072e-04
Loss = 1.4416e-03, PNorm = 155.1877, GNorm = 0.0705, lr_0 = 1.3063e-04
Loss = 1.1579e-03, PNorm = 155.1891, GNorm = 0.0533, lr_0 = 1.3054e-04
Loss = 1.9913e-03, PNorm = 155.1902, GNorm = 0.2042, lr_0 = 1.3045e-04
Loss = 1.2261e-03, PNorm = 155.1899, GNorm = 0.0904, lr_0 = 1.3036e-04
Loss = 1.5438e-03, PNorm = 155.1905, GNorm = 0.0898, lr_0 = 1.3027e-04
Loss = 6.6941e-04, PNorm = 155.1941, GNorm = 0.1703, lr_0 = 1.3018e-04
Loss = 1.6687e-03, PNorm = 155.1948, GNorm = 0.1486, lr_0 = 1.3009e-04
Loss = 1.6891e-03, PNorm = 155.1956, GNorm = 0.1323, lr_0 = 1.3000e-04
Loss = 1.8316e-03, PNorm = 155.1963, GNorm = 0.0904, lr_0 = 1.2992e-04
Loss = 9.0183e-04, PNorm = 155.1969, GNorm = 0.0394, lr_0 = 1.2983e-04
Loss = 1.7849e-03, PNorm = 155.1974, GNorm = 0.1605, lr_0 = 1.2974e-04
Loss = 2.6656e-03, PNorm = 155.1977, GNorm = 0.0438, lr_0 = 1.2965e-04
Loss = 6.6669e-04, PNorm = 155.1998, GNorm = 0.0705, lr_0 = 1.2956e-04
Loss = 1.3577e-03, PNorm = 155.2021, GNorm = 0.1775, lr_0 = 1.2947e-04
Loss = 1.1239e-03, PNorm = 155.2059, GNorm = 0.0361, lr_0 = 1.2938e-04
Loss = 1.4308e-03, PNorm = 155.2071, GNorm = 0.0888, lr_0 = 1.2929e-04
Loss = 2.5235e-03, PNorm = 155.2092, GNorm = 0.0704, lr_0 = 1.2921e-04
Loss = 1.7424e-03, PNorm = 155.2084, GNorm = 0.1558, lr_0 = 1.2912e-04
Loss = 1.3304e-03, PNorm = 155.2115, GNorm = 0.1225, lr_0 = 1.2903e-04
Loss = 9.4803e-04, PNorm = 155.2139, GNorm = 0.0858, lr_0 = 1.2894e-04
Loss = 1.3617e-03, PNorm = 155.2158, GNorm = 0.0888, lr_0 = 1.2885e-04
Loss = 9.1597e-04, PNorm = 155.2188, GNorm = 0.0590, lr_0 = 1.2876e-04
Loss = 7.4776e-04, PNorm = 155.2206, GNorm = 0.0268, lr_0 = 1.2867e-04
Loss = 1.4943e-03, PNorm = 155.2223, GNorm = 0.0348, lr_0 = 1.2859e-04
Loss = 1.3035e-03, PNorm = 155.2244, GNorm = 0.0717, lr_0 = 1.2850e-04
Loss = 2.3820e-03, PNorm = 155.2261, GNorm = 0.0736, lr_0 = 1.2841e-04
Loss = 3.7805e-03, PNorm = 155.2280, GNorm = 0.1179, lr_0 = 1.2832e-04
Loss = 7.6429e-04, PNorm = 155.2287, GNorm = 0.0724, lr_0 = 1.2823e-04
Loss = 8.6301e-04, PNorm = 155.2313, GNorm = 0.1355, lr_0 = 1.2815e-04
Loss = 1.2387e-03, PNorm = 155.2333, GNorm = 0.1513, lr_0 = 1.2806e-04
Loss = 6.5485e-04, PNorm = 155.2370, GNorm = 0.0695, lr_0 = 1.2797e-04
Validation mae = 0.475328
Epoch 27
Loss = 1.2325e-03, PNorm = 155.2386, GNorm = 0.0820, lr_0 = 1.2788e-04
Loss = 8.1522e-04, PNorm = 155.2388, GNorm = 0.0884, lr_0 = 1.2780e-04
Loss = 6.3757e-04, PNorm = 155.2386, GNorm = 0.0832, lr_0 = 1.2771e-04
Loss = 1.4183e-03, PNorm = 155.2389, GNorm = 0.0628, lr_0 = 1.2762e-04
Loss = 5.4842e-04, PNorm = 155.2407, GNorm = 0.1184, lr_0 = 1.2753e-04
Loss = 1.7874e-03, PNorm = 155.2436, GNorm = 0.0875, lr_0 = 1.2745e-04
Loss = 9.7065e-04, PNorm = 155.2469, GNorm = 0.1087, lr_0 = 1.2736e-04
Loss = 1.4610e-03, PNorm = 155.2490, GNorm = 0.1427, lr_0 = 1.2727e-04
Loss = 1.5192e-03, PNorm = 155.2501, GNorm = 0.0411, lr_0 = 1.2718e-04
Loss = 1.9760e-03, PNorm = 155.2505, GNorm = 0.2633, lr_0 = 1.2710e-04
Loss = 6.5011e-04, PNorm = 155.2516, GNorm = 0.0391, lr_0 = 1.2701e-04
Loss = 1.5204e-03, PNorm = 155.2531, GNorm = 0.0655, lr_0 = 1.2692e-04
Loss = 1.8449e-03, PNorm = 155.2541, GNorm = 0.0863, lr_0 = 1.2684e-04
Loss = 8.0282e-04, PNorm = 155.2548, GNorm = 0.1282, lr_0 = 1.2675e-04
Loss = 1.1495e-03, PNorm = 155.2560, GNorm = 0.0455, lr_0 = 1.2666e-04
Loss = 2.2362e-03, PNorm = 155.2562, GNorm = 0.1444, lr_0 = 1.2658e-04
Loss = 6.9896e-04, PNorm = 155.2563, GNorm = 0.0264, lr_0 = 1.2649e-04
Loss = 6.1127e-04, PNorm = 155.2578, GNorm = 0.1094, lr_0 = 1.2640e-04
Loss = 1.3634e-03, PNorm = 155.2608, GNorm = 0.0642, lr_0 = 1.2632e-04
Loss = 9.6184e-04, PNorm = 155.2621, GNorm = 0.0637, lr_0 = 1.2623e-04
Loss = 8.6316e-04, PNorm = 155.2632, GNorm = 0.0940, lr_0 = 1.2614e-04
Loss = 6.4277e-04, PNorm = 155.2651, GNorm = 0.0884, lr_0 = 1.2606e-04
Loss = 1.6641e-03, PNorm = 155.2664, GNorm = 0.0360, lr_0 = 1.2597e-04
Loss = 8.1159e-04, PNorm = 155.2676, GNorm = 0.1511, lr_0 = 1.2588e-04
Loss = 1.1712e-03, PNorm = 155.2677, GNorm = 0.0741, lr_0 = 1.2580e-04
Loss = 2.1983e-03, PNorm = 155.2701, GNorm = 0.0962, lr_0 = 1.2571e-04
Loss = 2.5034e-03, PNorm = 155.2711, GNorm = 0.1382, lr_0 = 1.2563e-04
Loss = 1.5453e-03, PNorm = 155.2718, GNorm = 0.0830, lr_0 = 1.2554e-04
Loss = 9.7207e-04, PNorm = 155.2713, GNorm = 0.0723, lr_0 = 1.2545e-04
Loss = 6.4022e-04, PNorm = 155.2726, GNorm = 0.0502, lr_0 = 1.2537e-04
Loss = 8.1945e-04, PNorm = 155.2746, GNorm = 0.0877, lr_0 = 1.2528e-04
Loss = 2.0419e-03, PNorm = 155.2755, GNorm = 0.0510, lr_0 = 1.2520e-04
Loss = 2.6577e-03, PNorm = 155.2776, GNorm = 0.1230, lr_0 = 1.2511e-04
Loss = 5.8238e-04, PNorm = 155.2775, GNorm = 0.1050, lr_0 = 1.2502e-04
Loss = 9.0758e-04, PNorm = 155.2770, GNorm = 0.1505, lr_0 = 1.2494e-04
Loss = 1.0132e-03, PNorm = 155.2785, GNorm = 0.0514, lr_0 = 1.2485e-04
Loss = 9.3107e-04, PNorm = 155.2797, GNorm = 0.0814, lr_0 = 1.2477e-04
Loss = 4.2453e-03, PNorm = 155.2812, GNorm = 0.0995, lr_0 = 1.2468e-04
Loss = 7.1692e-04, PNorm = 155.2835, GNorm = 0.2380, lr_0 = 1.2460e-04
Loss = 7.3189e-04, PNorm = 155.2853, GNorm = 0.0777, lr_0 = 1.2451e-04
Loss = 6.1851e-04, PNorm = 155.2887, GNorm = 0.0778, lr_0 = 1.2443e-04
Loss = 7.4213e-04, PNorm = 155.2912, GNorm = 0.0447, lr_0 = 1.2434e-04
Loss = 2.3921e-03, PNorm = 155.2920, GNorm = 0.0826, lr_0 = 1.2426e-04
Loss = 6.3613e-04, PNorm = 155.2927, GNorm = 0.0858, lr_0 = 1.2417e-04
Loss = 1.6521e-03, PNorm = 155.2949, GNorm = 0.1739, lr_0 = 1.2409e-04
Loss = 6.0524e-04, PNorm = 155.2973, GNorm = 0.0676, lr_0 = 1.2400e-04
Loss = 2.3305e-03, PNorm = 155.2987, GNorm = 0.1085, lr_0 = 1.2392e-04
Loss = 7.6557e-04, PNorm = 155.2999, GNorm = 0.0917, lr_0 = 1.2383e-04
Loss = 7.3420e-04, PNorm = 155.3014, GNorm = 0.0608, lr_0 = 1.2375e-04
Loss = 1.0910e-03, PNorm = 155.3029, GNorm = 0.0823, lr_0 = 1.2366e-04
Loss = 5.4431e-04, PNorm = 155.3028, GNorm = 0.0777, lr_0 = 1.2358e-04
Loss = 9.3748e-04, PNorm = 155.3038, GNorm = 0.0462, lr_0 = 1.2349e-04
Loss = 1.2877e-03, PNorm = 155.3050, GNorm = 0.1782, lr_0 = 1.2341e-04
Loss = 1.4634e-03, PNorm = 155.3084, GNorm = 0.0468, lr_0 = 1.2332e-04
Loss = 1.6570e-03, PNorm = 155.3126, GNorm = 0.0842, lr_0 = 1.2324e-04
Loss = 1.1595e-03, PNorm = 155.3152, GNorm = 0.0964, lr_0 = 1.2315e-04
Loss = 1.0862e-03, PNorm = 155.3172, GNorm = 0.1377, lr_0 = 1.2307e-04
Loss = 3.1933e-03, PNorm = 155.3185, GNorm = 0.0505, lr_0 = 1.2298e-04
Loss = 2.2163e-03, PNorm = 155.3211, GNorm = 0.2631, lr_0 = 1.2290e-04
Loss = 1.2936e-03, PNorm = 155.3232, GNorm = 0.0617, lr_0 = 1.2282e-04
Loss = 8.1583e-04, PNorm = 155.3240, GNorm = 0.0534, lr_0 = 1.2273e-04
Loss = 6.7934e-04, PNorm = 155.3254, GNorm = 0.0751, lr_0 = 1.2265e-04
Loss = 5.6000e-04, PNorm = 155.3287, GNorm = 0.0841, lr_0 = 1.2256e-04
Loss = 3.0629e-03, PNorm = 155.3309, GNorm = 0.2356, lr_0 = 1.2248e-04
Loss = 8.4124e-04, PNorm = 155.3333, GNorm = 0.0894, lr_0 = 1.2240e-04
Loss = 1.3457e-03, PNorm = 155.3363, GNorm = 0.1160, lr_0 = 1.2231e-04
Loss = 2.2720e-03, PNorm = 155.3379, GNorm = 0.0997, lr_0 = 1.2223e-04
Loss = 7.7014e-04, PNorm = 155.3394, GNorm = 0.0395, lr_0 = 1.2214e-04
Loss = 1.3254e-03, PNorm = 155.3393, GNorm = 0.0812, lr_0 = 1.2206e-04
Loss = 9.4659e-04, PNorm = 155.3398, GNorm = 0.1899, lr_0 = 1.2198e-04
Loss = 1.6566e-03, PNorm = 155.3396, GNorm = 0.1326, lr_0 = 1.2189e-04
Loss = 5.8825e-04, PNorm = 155.3392, GNorm = 0.0211, lr_0 = 1.2181e-04
Loss = 6.6582e-04, PNorm = 155.3412, GNorm = 0.0633, lr_0 = 1.2173e-04
Loss = 1.6926e-03, PNorm = 155.3421, GNorm = 0.0547, lr_0 = 1.2164e-04
Loss = 5.8756e-04, PNorm = 155.3436, GNorm = 0.1232, lr_0 = 1.2156e-04
Loss = 7.3880e-04, PNorm = 155.3444, GNorm = 0.0717, lr_0 = 1.2148e-04
Loss = 1.0596e-03, PNorm = 155.3464, GNorm = 0.0508, lr_0 = 1.2139e-04
Loss = 2.9603e-03, PNorm = 155.3483, GNorm = 0.1211, lr_0 = 1.2131e-04
Loss = 1.0331e-03, PNorm = 155.3506, GNorm = 0.0376, lr_0 = 1.2123e-04
Loss = 1.1601e-03, PNorm = 155.3515, GNorm = 0.0470, lr_0 = 1.2114e-04
Loss = 5.6649e-04, PNorm = 155.3528, GNorm = 0.1399, lr_0 = 1.2106e-04
Loss = 8.1618e-04, PNorm = 155.3533, GNorm = 0.1354, lr_0 = 1.2098e-04
Loss = 2.7560e-03, PNorm = 155.3544, GNorm = 0.1034, lr_0 = 1.2090e-04
Loss = 8.3251e-04, PNorm = 155.3564, GNorm = 0.1137, lr_0 = 1.2081e-04
Loss = 1.2936e-03, PNorm = 155.3601, GNorm = 0.0693, lr_0 = 1.2073e-04
Loss = 1.4908e-03, PNorm = 155.3615, GNorm = 0.1034, lr_0 = 1.2065e-04
Loss = 1.1767e-03, PNorm = 155.3615, GNorm = 0.1137, lr_0 = 1.2056e-04
Loss = 7.6926e-04, PNorm = 155.3623, GNorm = 0.0578, lr_0 = 1.2048e-04
Loss = 1.2486e-03, PNorm = 155.3625, GNorm = 0.0991, lr_0 = 1.2040e-04
Loss = 1.6007e-03, PNorm = 155.3630, GNorm = 0.1501, lr_0 = 1.2032e-04
Loss = 2.3406e-03, PNorm = 155.3655, GNorm = 0.1160, lr_0 = 1.2023e-04
Loss = 8.2283e-04, PNorm = 155.3685, GNorm = 0.0947, lr_0 = 1.2015e-04
Loss = 5.9831e-04, PNorm = 155.3714, GNorm = 0.0667, lr_0 = 1.2007e-04
Loss = 5.3224e-04, PNorm = 155.3741, GNorm = 0.0873, lr_0 = 1.1999e-04
Loss = 2.3389e-03, PNorm = 155.3740, GNorm = 0.1326, lr_0 = 1.1991e-04
Loss = 1.4047e-03, PNorm = 155.3743, GNorm = 0.0312, lr_0 = 1.1982e-04
Loss = 1.1346e-03, PNorm = 155.3748, GNorm = 0.0224, lr_0 = 1.1974e-04
Loss = 9.6630e-04, PNorm = 155.3765, GNorm = 0.0370, lr_0 = 1.1966e-04
Loss = 4.2817e-03, PNorm = 155.3787, GNorm = 0.0606, lr_0 = 1.1958e-04
Loss = 7.2800e-04, PNorm = 155.3810, GNorm = 0.0330, lr_0 = 1.1950e-04
Loss = 1.8267e-03, PNorm = 155.3821, GNorm = 0.4816, lr_0 = 1.1941e-04
Loss = 6.2341e-04, PNorm = 155.3829, GNorm = 0.0291, lr_0 = 1.1933e-04
Loss = 5.1879e-04, PNorm = 155.3834, GNorm = 0.0545, lr_0 = 1.1925e-04
Loss = 2.2612e-03, PNorm = 155.3845, GNorm = 0.0707, lr_0 = 1.1917e-04
Loss = 2.2529e-03, PNorm = 155.3847, GNorm = 0.0373, lr_0 = 1.1909e-04
Loss = 9.7236e-04, PNorm = 155.3864, GNorm = 0.0654, lr_0 = 1.1901e-04
Loss = 9.6378e-04, PNorm = 155.3891, GNorm = 0.1764, lr_0 = 1.1892e-04
Loss = 6.6997e-04, PNorm = 155.3899, GNorm = 0.1022, lr_0 = 1.1884e-04
Loss = 2.4934e-03, PNorm = 155.3919, GNorm = 0.1414, lr_0 = 1.1876e-04
Loss = 4.7745e-04, PNorm = 155.3941, GNorm = 0.0356, lr_0 = 1.1868e-04
Loss = 1.2199e-03, PNorm = 155.3967, GNorm = 0.1171, lr_0 = 1.1860e-04
Loss = 8.3384e-04, PNorm = 155.3988, GNorm = 0.0434, lr_0 = 1.1852e-04
Loss = 1.0251e-03, PNorm = 155.4000, GNorm = 0.1113, lr_0 = 1.1844e-04
Loss = 8.4398e-04, PNorm = 155.4005, GNorm = 0.0797, lr_0 = 1.1835e-04
Loss = 6.2415e-04, PNorm = 155.4013, GNorm = 0.0302, lr_0 = 1.1827e-04
Loss = 1.5179e-03, PNorm = 155.4016, GNorm = 0.0416, lr_0 = 1.1819e-04
Loss = 1.9474e-03, PNorm = 155.4044, GNorm = 0.0300, lr_0 = 1.1811e-04
Loss = 6.6088e-04, PNorm = 155.4057, GNorm = 0.1364, lr_0 = 1.1803e-04
Loss = 4.7619e-04, PNorm = 155.4073, GNorm = 0.0243, lr_0 = 1.1795e-04
Loss = 7.1282e-04, PNorm = 155.4078, GNorm = 0.1132, lr_0 = 1.1787e-04
Validation mae = 0.475201
Epoch 28
Loss = 4.9169e-04, PNorm = 155.4096, GNorm = 0.0526, lr_0 = 1.1779e-04
Loss = 1.9733e-03, PNorm = 155.4093, GNorm = 0.0797, lr_0 = 1.1771e-04
Loss = 1.0254e-03, PNorm = 155.4111, GNorm = 0.0830, lr_0 = 1.1763e-04
Loss = 5.9465e-04, PNorm = 155.4124, GNorm = 0.0732, lr_0 = 1.1755e-04
Loss = 5.9023e-04, PNorm = 155.4143, GNorm = 0.0747, lr_0 = 1.1747e-04
Loss = 1.1022e-03, PNorm = 155.4150, GNorm = 0.0545, lr_0 = 1.1739e-04
Loss = 4.8936e-04, PNorm = 155.4165, GNorm = 0.0276, lr_0 = 1.1730e-04
Loss = 1.2641e-03, PNorm = 155.4168, GNorm = 0.0599, lr_0 = 1.1722e-04
Loss = 1.0711e-03, PNorm = 155.4176, GNorm = 0.0575, lr_0 = 1.1714e-04
Loss = 6.7021e-04, PNorm = 155.4190, GNorm = 0.0712, lr_0 = 1.1706e-04
Loss = 1.5068e-03, PNorm = 155.4217, GNorm = 0.1342, lr_0 = 1.1698e-04
Loss = 6.9561e-04, PNorm = 155.4219, GNorm = 0.0951, lr_0 = 1.1690e-04
Loss = 1.1474e-03, PNorm = 155.4239, GNorm = 0.1158, lr_0 = 1.1682e-04
Loss = 1.1873e-03, PNorm = 155.4246, GNorm = 0.0796, lr_0 = 1.1674e-04
Loss = 5.9372e-04, PNorm = 155.4242, GNorm = 0.0680, lr_0 = 1.1666e-04
Loss = 1.4164e-03, PNorm = 155.4248, GNorm = 0.0938, lr_0 = 1.1658e-04
Loss = 6.2170e-04, PNorm = 155.4235, GNorm = 0.1904, lr_0 = 1.1650e-04
Loss = 6.1581e-04, PNorm = 155.4249, GNorm = 0.1266, lr_0 = 1.1642e-04
Loss = 7.7484e-04, PNorm = 155.4264, GNorm = 0.1068, lr_0 = 1.1634e-04
Loss = 6.8469e-04, PNorm = 155.4285, GNorm = 0.0844, lr_0 = 1.1626e-04
Loss = 2.0579e-03, PNorm = 155.4297, GNorm = 0.1360, lr_0 = 1.1618e-04
Loss = 9.4023e-04, PNorm = 155.4298, GNorm = 0.0276, lr_0 = 1.1611e-04
Loss = 1.1299e-03, PNorm = 155.4308, GNorm = 0.0346, lr_0 = 1.1603e-04
Loss = 1.2384e-03, PNorm = 155.4329, GNorm = 0.0629, lr_0 = 1.1595e-04
Loss = 7.2975e-04, PNorm = 155.4341, GNorm = 0.0952, lr_0 = 1.1587e-04
Loss = 1.4891e-03, PNorm = 155.4365, GNorm = 0.1347, lr_0 = 1.1579e-04
Loss = 5.8228e-04, PNorm = 155.4367, GNorm = 0.0976, lr_0 = 1.1571e-04
Loss = 1.1124e-03, PNorm = 155.4362, GNorm = 0.0622, lr_0 = 1.1563e-04
Loss = 1.2181e-03, PNorm = 155.4371, GNorm = 0.0778, lr_0 = 1.1555e-04
Loss = 1.1146e-03, PNorm = 155.4397, GNorm = 0.1140, lr_0 = 1.1547e-04
Loss = 3.9683e-03, PNorm = 155.4407, GNorm = 0.1011, lr_0 = 1.1539e-04
Loss = 1.4819e-03, PNorm = 155.4426, GNorm = 0.1181, lr_0 = 1.1531e-04
Loss = 1.0362e-03, PNorm = 155.4429, GNorm = 0.1134, lr_0 = 1.1523e-04
Loss = 1.1811e-03, PNorm = 155.4425, GNorm = 0.2106, lr_0 = 1.1515e-04
Loss = 1.9855e-03, PNorm = 155.4452, GNorm = 0.1560, lr_0 = 1.1508e-04
Loss = 1.0500e-03, PNorm = 155.4463, GNorm = 0.0443, lr_0 = 1.1500e-04
Loss = 2.5180e-03, PNorm = 155.4485, GNorm = 0.1233, lr_0 = 1.1492e-04
Loss = 1.0363e-03, PNorm = 155.4473, GNorm = 0.0896, lr_0 = 1.1484e-04
Loss = 2.2946e-03, PNorm = 155.4479, GNorm = 0.3802, lr_0 = 1.1476e-04
Loss = 5.9863e-04, PNorm = 155.4509, GNorm = 0.0921, lr_0 = 1.1468e-04
Loss = 6.0287e-04, PNorm = 155.4532, GNorm = 0.0774, lr_0 = 1.1460e-04
Loss = 1.2958e-03, PNorm = 155.4563, GNorm = 0.0723, lr_0 = 1.1452e-04
Loss = 4.8895e-04, PNorm = 155.4574, GNorm = 0.0305, lr_0 = 1.1445e-04
Loss = 1.9159e-03, PNorm = 155.4586, GNorm = 0.1338, lr_0 = 1.1437e-04
Loss = 1.2731e-03, PNorm = 155.4611, GNorm = 0.0967, lr_0 = 1.1429e-04
Loss = 1.0361e-03, PNorm = 155.4621, GNorm = 0.2315, lr_0 = 1.1421e-04
Loss = 1.4255e-03, PNorm = 155.4650, GNorm = 0.0392, lr_0 = 1.1413e-04
Loss = 4.7292e-04, PNorm = 155.4658, GNorm = 0.0865, lr_0 = 1.1405e-04
Loss = 6.8438e-04, PNorm = 155.4679, GNorm = 0.1608, lr_0 = 1.1398e-04
Loss = 7.1775e-04, PNorm = 155.4713, GNorm = 0.1305, lr_0 = 1.1390e-04
Loss = 2.6725e-03, PNorm = 155.4730, GNorm = 0.1014, lr_0 = 1.1382e-04
Loss = 9.7087e-04, PNorm = 155.4731, GNorm = 0.0330, lr_0 = 1.1374e-04
Loss = 1.3594e-03, PNorm = 155.4748, GNorm = 0.0436, lr_0 = 1.1366e-04
Loss = 7.6345e-04, PNorm = 155.4765, GNorm = 0.0483, lr_0 = 1.1359e-04
Loss = 9.2950e-04, PNorm = 155.4773, GNorm = 0.0843, lr_0 = 1.1351e-04
Loss = 4.8227e-04, PNorm = 155.4791, GNorm = 0.0504, lr_0 = 1.1343e-04
Loss = 6.8639e-04, PNorm = 155.4793, GNorm = 0.0782, lr_0 = 1.1335e-04
Loss = 5.8207e-04, PNorm = 155.4802, GNorm = 0.1076, lr_0 = 1.1328e-04
Loss = 1.4266e-03, PNorm = 155.4804, GNorm = 0.0687, lr_0 = 1.1320e-04
Loss = 4.6009e-04, PNorm = 155.4807, GNorm = 0.0428, lr_0 = 1.1312e-04
Loss = 5.3979e-04, PNorm = 155.4808, GNorm = 0.0168, lr_0 = 1.1304e-04
Loss = 1.2488e-03, PNorm = 155.4817, GNorm = 0.1417, lr_0 = 1.1297e-04
Loss = 2.0626e-03, PNorm = 155.4835, GNorm = 0.0514, lr_0 = 1.1289e-04
Loss = 9.2667e-04, PNorm = 155.4838, GNorm = 0.0635, lr_0 = 1.1281e-04
Loss = 6.7716e-04, PNorm = 155.4858, GNorm = 0.1098, lr_0 = 1.1273e-04
Loss = 1.3901e-03, PNorm = 155.4869, GNorm = 0.1711, lr_0 = 1.1266e-04
Loss = 1.4828e-03, PNorm = 155.4880, GNorm = 0.0931, lr_0 = 1.1258e-04
Loss = 6.2916e-04, PNorm = 155.4888, GNorm = 0.0467, lr_0 = 1.1250e-04
Loss = 7.2183e-04, PNorm = 155.4893, GNorm = 0.0497, lr_0 = 1.1243e-04
Loss = 8.5904e-04, PNorm = 155.4896, GNorm = 0.0521, lr_0 = 1.1235e-04
Loss = 5.2178e-04, PNorm = 155.4916, GNorm = 0.1080, lr_0 = 1.1227e-04
Loss = 5.2606e-04, PNorm = 155.4939, GNorm = 0.0459, lr_0 = 1.1219e-04
Loss = 3.1835e-03, PNorm = 155.4958, GNorm = 0.0786, lr_0 = 1.1212e-04
Loss = 5.6841e-04, PNorm = 155.4967, GNorm = 0.1185, lr_0 = 1.1204e-04
Loss = 2.3062e-03, PNorm = 155.4991, GNorm = 0.0843, lr_0 = 1.1196e-04
Loss = 1.0471e-03, PNorm = 155.5017, GNorm = 0.1681, lr_0 = 1.1189e-04
Loss = 4.4808e-04, PNorm = 155.5043, GNorm = 0.0545, lr_0 = 1.1181e-04
Loss = 1.2943e-03, PNorm = 155.5064, GNorm = 0.0404, lr_0 = 1.1173e-04
Loss = 1.1715e-03, PNorm = 155.5064, GNorm = 0.0627, lr_0 = 1.1166e-04
Loss = 5.9190e-04, PNorm = 155.5061, GNorm = 0.0855, lr_0 = 1.1158e-04
Loss = 1.2962e-03, PNorm = 155.5063, GNorm = 0.0376, lr_0 = 1.1150e-04
Loss = 1.1838e-03, PNorm = 155.5093, GNorm = 0.2695, lr_0 = 1.1143e-04
Loss = 7.3649e-04, PNorm = 155.5112, GNorm = 0.0724, lr_0 = 1.1135e-04
Loss = 9.3607e-04, PNorm = 155.5132, GNorm = 0.0905, lr_0 = 1.1128e-04
Loss = 1.9120e-03, PNorm = 155.5133, GNorm = 0.2273, lr_0 = 1.1120e-04
Loss = 6.2197e-04, PNorm = 155.5145, GNorm = 0.0495, lr_0 = 1.1112e-04
Loss = 1.3749e-03, PNorm = 155.5136, GNorm = 0.0985, lr_0 = 1.1105e-04
Loss = 6.2221e-04, PNorm = 155.5153, GNorm = 0.0938, lr_0 = 1.1097e-04
Loss = 8.1135e-04, PNorm = 155.5174, GNorm = 0.0857, lr_0 = 1.1089e-04
Loss = 1.4373e-03, PNorm = 155.5180, GNorm = 0.1502, lr_0 = 1.1082e-04
Loss = 1.8547e-03, PNorm = 155.5183, GNorm = 0.0223, lr_0 = 1.1074e-04
Loss = 1.8746e-03, PNorm = 155.5193, GNorm = 0.1473, lr_0 = 1.1067e-04
Loss = 8.7372e-04, PNorm = 155.5193, GNorm = 0.0838, lr_0 = 1.1059e-04
Loss = 5.0161e-04, PNorm = 155.5197, GNorm = 0.1382, lr_0 = 1.1052e-04
Loss = 1.4284e-03, PNorm = 155.5214, GNorm = 0.1488, lr_0 = 1.1044e-04
Loss = 1.4669e-03, PNorm = 155.5229, GNorm = 0.1145, lr_0 = 1.1036e-04
Loss = 1.0400e-03, PNorm = 155.5247, GNorm = 0.0634, lr_0 = 1.1029e-04
Loss = 5.8024e-04, PNorm = 155.5269, GNorm = 0.0728, lr_0 = 1.1021e-04
Loss = 7.9754e-04, PNorm = 155.5287, GNorm = 0.0993, lr_0 = 1.1014e-04
Loss = 1.1681e-03, PNorm = 155.5298, GNorm = 0.1005, lr_0 = 1.1006e-04
Loss = 7.8988e-04, PNorm = 155.5303, GNorm = 0.0376, lr_0 = 1.0999e-04
Loss = 6.6037e-04, PNorm = 155.5302, GNorm = 0.0160, lr_0 = 1.0991e-04
Loss = 1.5544e-03, PNorm = 155.5308, GNorm = 0.0981, lr_0 = 1.0984e-04
Loss = 1.3206e-03, PNorm = 155.5324, GNorm = 0.1268, lr_0 = 1.0976e-04
Loss = 4.9872e-04, PNorm = 155.5346, GNorm = 0.0438, lr_0 = 1.0969e-04
Loss = 1.0383e-03, PNorm = 155.5359, GNorm = 0.0966, lr_0 = 1.0961e-04
Loss = 1.1881e-03, PNorm = 155.5372, GNorm = 0.0819, lr_0 = 1.0954e-04
Loss = 1.6855e-03, PNorm = 155.5379, GNorm = 0.1986, lr_0 = 1.0946e-04
Loss = 8.9752e-04, PNorm = 155.5398, GNorm = 0.0719, lr_0 = 1.0939e-04
Loss = 2.0986e-03, PNorm = 155.5413, GNorm = 0.0683, lr_0 = 1.0931e-04
Loss = 4.4363e-04, PNorm = 155.5416, GNorm = 0.0570, lr_0 = 1.0924e-04
Loss = 2.1559e-03, PNorm = 155.5423, GNorm = 0.3099, lr_0 = 1.0916e-04
Loss = 5.2822e-04, PNorm = 155.5419, GNorm = 0.0574, lr_0 = 1.0909e-04
Loss = 3.4912e-03, PNorm = 155.5435, GNorm = 0.1434, lr_0 = 1.0901e-04
Loss = 1.7534e-03, PNorm = 155.5449, GNorm = 0.1491, lr_0 = 1.0894e-04
Loss = 1.3760e-03, PNorm = 155.5473, GNorm = 0.0928, lr_0 = 1.0886e-04
Loss = 1.8926e-03, PNorm = 155.5496, GNorm = 0.0825, lr_0 = 1.0879e-04
Loss = 1.2219e-03, PNorm = 155.5523, GNorm = 0.1191, lr_0 = 1.0871e-04
Loss = 2.2644e-03, PNorm = 155.5529, GNorm = 0.1982, lr_0 = 1.0864e-04
Loss = 1.4661e-03, PNorm = 155.5556, GNorm = 0.1283, lr_0 = 1.0856e-04
Validation mae = 0.474556
Epoch 29
Loss = 1.8382e-03, PNorm = 155.5573, GNorm = 0.0551, lr_0 = 1.0849e-04
Loss = 1.4329e-03, PNorm = 155.5584, GNorm = 0.0577, lr_0 = 1.0841e-04
Loss = 1.2739e-03, PNorm = 155.5617, GNorm = 0.0775, lr_0 = 1.0834e-04
Loss = 9.9230e-04, PNorm = 155.5629, GNorm = 0.0835, lr_0 = 1.0827e-04
Loss = 7.6942e-04, PNorm = 155.5638, GNorm = 0.1217, lr_0 = 1.0819e-04
Loss = 5.0607e-04, PNorm = 155.5648, GNorm = 0.0403, lr_0 = 1.0812e-04
Loss = 7.0631e-04, PNorm = 155.5660, GNorm = 0.0232, lr_0 = 1.0804e-04
Loss = 9.1367e-04, PNorm = 155.5673, GNorm = 0.0604, lr_0 = 1.0797e-04
Loss = 1.1306e-03, PNorm = 155.5681, GNorm = 0.0281, lr_0 = 1.0790e-04
Loss = 6.9418e-04, PNorm = 155.5711, GNorm = 0.1106, lr_0 = 1.0782e-04
Loss = 3.2969e-03, PNorm = 155.5733, GNorm = 0.1493, lr_0 = 1.0775e-04
Loss = 5.7236e-04, PNorm = 155.5746, GNorm = 0.0514, lr_0 = 1.0767e-04
Loss = 6.1987e-04, PNorm = 155.5754, GNorm = 0.0582, lr_0 = 1.0760e-04
Loss = 6.5514e-04, PNorm = 155.5768, GNorm = 0.0214, lr_0 = 1.0753e-04
Loss = 8.4813e-04, PNorm = 155.5779, GNorm = 0.0470, lr_0 = 1.0745e-04
Loss = 5.0509e-04, PNorm = 155.5798, GNorm = 0.0488, lr_0 = 1.0738e-04
Loss = 1.8125e-03, PNorm = 155.5810, GNorm = 0.0645, lr_0 = 1.0731e-04
Loss = 5.0013e-04, PNorm = 155.5826, GNorm = 0.0858, lr_0 = 1.0723e-04
Loss = 5.0889e-04, PNorm = 155.5834, GNorm = 0.0530, lr_0 = 1.0716e-04
Loss = 8.4902e-04, PNorm = 155.5845, GNorm = 0.1385, lr_0 = 1.0709e-04
Loss = 1.2262e-03, PNorm = 155.5847, GNorm = 0.1248, lr_0 = 1.0701e-04
Loss = 5.2428e-04, PNorm = 155.5840, GNorm = 0.0631, lr_0 = 1.0694e-04
Loss = 7.9014e-04, PNorm = 155.5835, GNorm = 0.1262, lr_0 = 1.0687e-04
Loss = 4.4398e-04, PNorm = 155.5823, GNorm = 0.0991, lr_0 = 1.0679e-04
Loss = 4.0191e-04, PNorm = 155.5829, GNorm = 0.0250, lr_0 = 1.0672e-04
Loss = 1.2862e-03, PNorm = 155.5852, GNorm = 0.0413, lr_0 = 1.0665e-04
Loss = 5.5716e-04, PNorm = 155.5858, GNorm = 0.0949, lr_0 = 1.0657e-04
Loss = 4.7095e-04, PNorm = 155.5863, GNorm = 0.0923, lr_0 = 1.0650e-04
Loss = 6.0090e-04, PNorm = 155.5872, GNorm = 0.0266, lr_0 = 1.0643e-04
Loss = 1.0016e-03, PNorm = 155.5878, GNorm = 0.0733, lr_0 = 1.0635e-04
Loss = 5.1984e-04, PNorm = 155.5887, GNorm = 0.1008, lr_0 = 1.0628e-04
Loss = 1.4940e-03, PNorm = 155.5887, GNorm = 0.0314, lr_0 = 1.0621e-04
Loss = 1.8571e-03, PNorm = 155.5898, GNorm = 0.0424, lr_0 = 1.0614e-04
Loss = 9.6305e-04, PNorm = 155.5916, GNorm = 0.0418, lr_0 = 1.0606e-04
Loss = 4.2303e-04, PNorm = 155.5931, GNorm = 0.0614, lr_0 = 1.0599e-04
Loss = 1.6347e-03, PNorm = 155.5937, GNorm = 0.1215, lr_0 = 1.0592e-04
Loss = 1.5028e-03, PNorm = 155.5942, GNorm = 0.0615, lr_0 = 1.0585e-04
Loss = 9.5350e-04, PNorm = 155.5961, GNorm = 0.0454, lr_0 = 1.0577e-04
Loss = 1.5712e-03, PNorm = 155.5973, GNorm = 0.0551, lr_0 = 1.0570e-04
Loss = 7.5628e-04, PNorm = 155.5983, GNorm = 0.0652, lr_0 = 1.0563e-04
Loss = 1.1119e-03, PNorm = 155.5986, GNorm = 0.0910, lr_0 = 1.0556e-04
Loss = 6.9419e-04, PNorm = 155.6002, GNorm = 0.0268, lr_0 = 1.0548e-04
Loss = 1.0078e-03, PNorm = 155.6013, GNorm = 0.0301, lr_0 = 1.0541e-04
Loss = 1.6809e-03, PNorm = 155.6018, GNorm = 0.2869, lr_0 = 1.0534e-04
Loss = 1.8559e-03, PNorm = 155.6043, GNorm = 0.0667, lr_0 = 1.0527e-04
Loss = 1.1857e-03, PNorm = 155.6064, GNorm = 0.0489, lr_0 = 1.0519e-04
Loss = 4.9032e-04, PNorm = 155.6067, GNorm = 0.0787, lr_0 = 1.0512e-04
Loss = 2.6181e-03, PNorm = 155.6072, GNorm = 0.0744, lr_0 = 1.0505e-04
Loss = 5.7987e-04, PNorm = 155.6068, GNorm = 0.0982, lr_0 = 1.0498e-04
Loss = 1.5956e-03, PNorm = 155.6075, GNorm = 0.0798, lr_0 = 1.0491e-04
Loss = 1.2845e-03, PNorm = 155.6078, GNorm = 0.4878, lr_0 = 1.0483e-04
Loss = 1.2200e-03, PNorm = 155.6103, GNorm = 0.0790, lr_0 = 1.0476e-04
Loss = 1.5398e-03, PNorm = 155.6118, GNorm = 0.0515, lr_0 = 1.0469e-04
Loss = 1.3278e-03, PNorm = 155.6125, GNorm = 0.0441, lr_0 = 1.0462e-04
Loss = 3.3041e-03, PNorm = 155.6117, GNorm = 0.0328, lr_0 = 1.0455e-04
Loss = 6.1307e-04, PNorm = 155.6116, GNorm = 0.0585, lr_0 = 1.0448e-04
Loss = 1.7226e-03, PNorm = 155.6124, GNorm = 0.1655, lr_0 = 1.0440e-04
Loss = 1.3887e-03, PNorm = 155.6136, GNorm = 0.0590, lr_0 = 1.0433e-04
Loss = 4.8862e-04, PNorm = 155.6145, GNorm = 0.0595, lr_0 = 1.0426e-04
Loss = 5.3413e-04, PNorm = 155.6157, GNorm = 0.1586, lr_0 = 1.0419e-04
Loss = 4.2593e-04, PNorm = 155.6176, GNorm = 0.0506, lr_0 = 1.0412e-04
Loss = 1.4702e-03, PNorm = 155.6190, GNorm = 0.1320, lr_0 = 1.0405e-04
Loss = 1.7451e-03, PNorm = 155.6189, GNorm = 0.0976, lr_0 = 1.0398e-04
Loss = 4.7530e-03, PNorm = 155.6190, GNorm = 0.1268, lr_0 = 1.0391e-04
Loss = 1.5905e-03, PNorm = 155.6217, GNorm = 0.0294, lr_0 = 1.0383e-04
Loss = 7.4544e-04, PNorm = 155.6230, GNorm = 0.0974, lr_0 = 1.0376e-04
Loss = 4.5137e-04, PNorm = 155.6246, GNorm = 0.1158, lr_0 = 1.0369e-04
Loss = 5.1151e-04, PNorm = 155.6257, GNorm = 0.1726, lr_0 = 1.0362e-04
Loss = 6.0609e-04, PNorm = 155.6270, GNorm = 0.0741, lr_0 = 1.0355e-04
Loss = 9.6457e-04, PNorm = 155.6277, GNorm = 0.0540, lr_0 = 1.0348e-04
Loss = 4.3582e-04, PNorm = 155.6280, GNorm = 0.0910, lr_0 = 1.0341e-04
Loss = 4.0054e-04, PNorm = 155.6283, GNorm = 0.0393, lr_0 = 1.0334e-04
Loss = 4.8424e-04, PNorm = 155.6292, GNorm = 0.0242, lr_0 = 1.0327e-04
Loss = 9.8934e-04, PNorm = 155.6312, GNorm = 0.0346, lr_0 = 1.0320e-04
Loss = 1.8556e-03, PNorm = 155.6329, GNorm = 0.0590, lr_0 = 1.0312e-04
Loss = 6.5096e-04, PNorm = 155.6339, GNorm = 0.0405, lr_0 = 1.0305e-04
Loss = 9.0708e-04, PNorm = 155.6354, GNorm = 0.0894, lr_0 = 1.0298e-04
Loss = 5.2196e-04, PNorm = 155.6363, GNorm = 0.0238, lr_0 = 1.0291e-04
Loss = 9.1528e-04, PNorm = 155.6364, GNorm = 0.1504, lr_0 = 1.0284e-04
Loss = 3.2898e-03, PNorm = 155.6369, GNorm = 0.2124, lr_0 = 1.0277e-04
Loss = 9.4823e-04, PNorm = 155.6380, GNorm = 0.1053, lr_0 = 1.0270e-04
Loss = 6.2004e-04, PNorm = 155.6403, GNorm = 0.1635, lr_0 = 1.0263e-04
Loss = 9.7264e-04, PNorm = 155.6439, GNorm = 0.0177, lr_0 = 1.0256e-04
Loss = 6.1300e-04, PNorm = 155.6465, GNorm = 0.0261, lr_0 = 1.0249e-04
Loss = 6.4576e-04, PNorm = 155.6488, GNorm = 0.0483, lr_0 = 1.0242e-04
Loss = 1.0841e-03, PNorm = 155.6501, GNorm = 0.0391, lr_0 = 1.0235e-04
Loss = 1.0567e-03, PNorm = 155.6504, GNorm = 0.0702, lr_0 = 1.0228e-04
Loss = 5.2319e-04, PNorm = 155.6522, GNorm = 0.1160, lr_0 = 1.0221e-04
Loss = 4.0253e-04, PNorm = 155.6535, GNorm = 0.0781, lr_0 = 1.0214e-04
Loss = 2.5209e-03, PNorm = 155.6548, GNorm = 0.1006, lr_0 = 1.0207e-04
Loss = 1.1503e-03, PNorm = 155.6560, GNorm = 0.1404, lr_0 = 1.0200e-04
Loss = 7.1767e-04, PNorm = 155.6586, GNorm = 0.0566, lr_0 = 1.0193e-04
Loss = 2.0788e-03, PNorm = 155.6579, GNorm = 0.0622, lr_0 = 1.0186e-04
Loss = 7.7954e-04, PNorm = 155.6596, GNorm = 0.1889, lr_0 = 1.0179e-04
Loss = 6.8833e-04, PNorm = 155.6614, GNorm = 0.1617, lr_0 = 1.0172e-04
Loss = 1.1552e-03, PNorm = 155.6645, GNorm = 0.0534, lr_0 = 1.0165e-04
Loss = 1.5186e-03, PNorm = 155.6652, GNorm = 0.0679, lr_0 = 1.0158e-04
Loss = 4.5490e-04, PNorm = 155.6670, GNorm = 0.1666, lr_0 = 1.0151e-04
Loss = 8.2086e-04, PNorm = 155.6686, GNorm = 0.1626, lr_0 = 1.0144e-04
Loss = 8.3175e-04, PNorm = 155.6704, GNorm = 0.0480, lr_0 = 1.0137e-04
Loss = 3.5597e-03, PNorm = 155.6712, GNorm = 0.0343, lr_0 = 1.0130e-04
Loss = 5.4470e-04, PNorm = 155.6714, GNorm = 0.0969, lr_0 = 1.0123e-04
Loss = 8.5610e-04, PNorm = 155.6731, GNorm = 0.0354, lr_0 = 1.0116e-04
Loss = 1.0366e-03, PNorm = 155.6733, GNorm = 0.1706, lr_0 = 1.0110e-04
Loss = 1.2281e-03, PNorm = 155.6741, GNorm = 0.0591, lr_0 = 1.0103e-04
Loss = 7.0898e-04, PNorm = 155.6757, GNorm = 0.0257, lr_0 = 1.0096e-04
Loss = 4.4983e-04, PNorm = 155.6757, GNorm = 0.0468, lr_0 = 1.0089e-04
Loss = 1.4815e-03, PNorm = 155.6757, GNorm = 0.0757, lr_0 = 1.0082e-04
Loss = 1.4918e-03, PNorm = 155.6742, GNorm = 0.0771, lr_0 = 1.0075e-04
Loss = 1.7073e-03, PNorm = 155.6752, GNorm = 0.0389, lr_0 = 1.0068e-04
Loss = 7.8139e-04, PNorm = 155.6746, GNorm = 0.1166, lr_0 = 1.0061e-04
Loss = 1.9905e-03, PNorm = 155.6756, GNorm = 0.1150, lr_0 = 1.0054e-04
Loss = 9.5531e-04, PNorm = 155.6772, GNorm = 0.1025, lr_0 = 1.0047e-04
Loss = 7.7399e-04, PNorm = 155.6788, GNorm = 0.0261, lr_0 = 1.0041e-04
Loss = 4.7603e-04, PNorm = 155.6793, GNorm = 0.0716, lr_0 = 1.0034e-04
Loss = 8.3001e-04, PNorm = 155.6801, GNorm = 0.0685, lr_0 = 1.0027e-04
Loss = 1.5982e-03, PNorm = 155.6824, GNorm = 0.1816, lr_0 = 1.0020e-04
Loss = 5.0593e-04, PNorm = 155.6845, GNorm = 0.0784, lr_0 = 1.0013e-04
Loss = 1.5555e-03, PNorm = 155.6867, GNorm = 0.1038, lr_0 = 1.0006e-04
Loss = 7.4139e-04, PNorm = 155.6882, GNorm = 0.0715, lr_0 = 1.0000e-04
Validation mae = 0.475718
Model 0 best validation mae = 0.474556 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.455806
Ensemble test mae = 0.455806
Fold 4
Splitting data with seed 4
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 8.3098e-01, PNorm = 62.2439, GNorm = 2.2794, lr_0 = 1.0413e-04
Loss = 7.4080e-01, PNorm = 62.2550, GNorm = 1.6521, lr_0 = 1.0788e-04
Loss = 5.8070e-01, PNorm = 62.2671, GNorm = 2.1906, lr_0 = 1.1163e-04
Loss = 4.8659e-01, PNorm = 62.2789, GNorm = 1.6780, lr_0 = 1.1537e-04
Loss = 4.8034e-01, PNorm = 62.2898, GNorm = 2.3460, lr_0 = 1.1913e-04
Loss = 4.2355e-01, PNorm = 62.2983, GNorm = 2.3883, lr_0 = 1.2287e-04
Loss = 4.2712e-01, PNorm = 62.3068, GNorm = 2.0058, lr_0 = 1.2663e-04
Loss = 3.9737e-01, PNorm = 62.3169, GNorm = 2.2567, lr_0 = 1.3038e-04
Loss = 4.0719e-01, PNorm = 62.3278, GNorm = 2.7476, lr_0 = 1.3413e-04
Loss = 3.8305e-01, PNorm = 62.3377, GNorm = 1.6443, lr_0 = 1.3788e-04
Loss = 3.5809e-01, PNorm = 62.3488, GNorm = 1.8637, lr_0 = 1.4163e-04
Loss = 3.7541e-01, PNorm = 62.3588, GNorm = 2.8494, lr_0 = 1.4537e-04
Loss = 3.3029e-01, PNorm = 62.3684, GNorm = 1.5853, lr_0 = 1.4913e-04
Loss = 2.9917e-01, PNorm = 62.3792, GNorm = 1.6772, lr_0 = 1.5288e-04
Loss = 2.9572e-01, PNorm = 62.3896, GNorm = 1.6826, lr_0 = 1.5662e-04
Loss = 2.9095e-01, PNorm = 62.3986, GNorm = 2.1780, lr_0 = 1.6038e-04
Loss = 3.1478e-01, PNorm = 62.4091, GNorm = 1.8309, lr_0 = 1.6412e-04
Loss = 3.1858e-01, PNorm = 62.4210, GNorm = 1.8919, lr_0 = 1.6788e-04
Loss = 3.0622e-01, PNorm = 62.4325, GNorm = 1.7979, lr_0 = 1.7163e-04
Loss = 3.1709e-01, PNorm = 62.4446, GNorm = 2.0840, lr_0 = 1.7538e-04
Loss = 3.2232e-01, PNorm = 62.4549, GNorm = 2.6891, lr_0 = 1.7913e-04
Loss = 3.2810e-01, PNorm = 62.4687, GNorm = 1.6831, lr_0 = 1.8288e-04
Loss = 3.1096e-01, PNorm = 62.4824, GNorm = 2.7564, lr_0 = 1.8662e-04
Loss = 3.3478e-01, PNorm = 62.4931, GNorm = 1.4516, lr_0 = 1.9038e-04
Loss = 2.8734e-01, PNorm = 62.5072, GNorm = 1.5601, lr_0 = 1.9413e-04
Loss = 2.7755e-01, PNorm = 62.5217, GNorm = 1.5639, lr_0 = 1.9788e-04
Loss = 2.8734e-01, PNorm = 62.5343, GNorm = 1.8318, lr_0 = 2.0163e-04
Loss = 2.5750e-01, PNorm = 62.5511, GNorm = 2.1958, lr_0 = 2.0537e-04
Loss = 2.2734e-01, PNorm = 62.5636, GNorm = 1.1383, lr_0 = 2.0913e-04
Loss = 2.8401e-01, PNorm = 62.5756, GNorm = 1.8851, lr_0 = 2.1288e-04
Loss = 2.7615e-01, PNorm = 62.5916, GNorm = 1.3725, lr_0 = 2.1663e-04
Loss = 2.8816e-01, PNorm = 62.6051, GNorm = 2.2921, lr_0 = 2.2038e-04
Loss = 3.0798e-01, PNorm = 62.6222, GNorm = 1.4396, lr_0 = 2.2412e-04
Loss = 2.7416e-01, PNorm = 62.6419, GNorm = 1.5322, lr_0 = 2.2787e-04
Loss = 3.1449e-01, PNorm = 62.6595, GNorm = 1.5789, lr_0 = 2.3163e-04
Loss = 3.0068e-01, PNorm = 62.6764, GNorm = 2.0150, lr_0 = 2.3538e-04
Loss = 2.9702e-01, PNorm = 62.6964, GNorm = 1.7119, lr_0 = 2.3913e-04
Loss = 3.2126e-01, PNorm = 62.7172, GNorm = 1.9392, lr_0 = 2.4288e-04
Loss = 2.6307e-01, PNorm = 62.7348, GNorm = 2.1044, lr_0 = 2.4662e-04
Loss = 2.9549e-01, PNorm = 62.7546, GNorm = 1.8999, lr_0 = 2.5038e-04
Loss = 2.9392e-01, PNorm = 62.7762, GNorm = 1.6989, lr_0 = 2.5413e-04
Loss = 2.5715e-01, PNorm = 62.7967, GNorm = 1.4482, lr_0 = 2.5788e-04
Loss = 2.5938e-01, PNorm = 62.8130, GNorm = 1.5574, lr_0 = 2.6163e-04
Loss = 2.9208e-01, PNorm = 62.8325, GNorm = 1.4134, lr_0 = 2.6537e-04
Loss = 2.3990e-01, PNorm = 62.8540, GNorm = 1.3518, lr_0 = 2.6912e-04
Loss = 2.4349e-01, PNorm = 62.8761, GNorm = 1.5921, lr_0 = 2.7288e-04
Loss = 2.4971e-01, PNorm = 62.8944, GNorm = 1.7350, lr_0 = 2.7663e-04
Loss = 2.6798e-01, PNorm = 62.9151, GNorm = 1.7988, lr_0 = 2.8038e-04
Loss = 2.4046e-01, PNorm = 62.9374, GNorm = 1.0476, lr_0 = 2.8413e-04
Loss = 2.5413e-01, PNorm = 62.9585, GNorm = 1.4495, lr_0 = 2.8787e-04
Loss = 2.3711e-01, PNorm = 62.9846, GNorm = 1.4908, lr_0 = 2.9163e-04
Loss = 2.5537e-01, PNorm = 63.0106, GNorm = 1.4873, lr_0 = 2.9538e-04
Loss = 2.8488e-01, PNorm = 63.0356, GNorm = 1.1606, lr_0 = 2.9913e-04
Loss = 2.4731e-01, PNorm = 63.0630, GNorm = 1.2371, lr_0 = 3.0288e-04
Loss = 2.6861e-01, PNorm = 63.0889, GNorm = 1.1950, lr_0 = 3.0662e-04
Loss = 2.3308e-01, PNorm = 63.1155, GNorm = 1.1940, lr_0 = 3.1037e-04
Loss = 2.8071e-01, PNorm = 63.1387, GNorm = 2.0201, lr_0 = 3.1413e-04
Loss = 2.2888e-01, PNorm = 63.1656, GNorm = 1.3502, lr_0 = 3.1788e-04
Loss = 2.4053e-01, PNorm = 63.1950, GNorm = 1.6376, lr_0 = 3.2163e-04
Loss = 2.8235e-01, PNorm = 63.2195, GNorm = 2.1005, lr_0 = 3.2538e-04
Loss = 2.7034e-01, PNorm = 63.2526, GNorm = 1.6114, lr_0 = 3.2912e-04
Loss = 2.6634e-01, PNorm = 63.2830, GNorm = 0.9190, lr_0 = 3.3288e-04
Loss = 2.6183e-01, PNorm = 63.3189, GNorm = 0.9992, lr_0 = 3.3663e-04
Loss = 2.3979e-01, PNorm = 63.3414, GNorm = 1.2535, lr_0 = 3.4038e-04
Loss = 2.4456e-01, PNorm = 63.3678, GNorm = 1.7465, lr_0 = 3.4413e-04
Loss = 2.2123e-01, PNorm = 63.3986, GNorm = 1.7569, lr_0 = 3.4787e-04
Loss = 2.7386e-01, PNorm = 63.4238, GNorm = 1.6295, lr_0 = 3.5162e-04
Loss = 2.1749e-01, PNorm = 63.4559, GNorm = 1.1100, lr_0 = 3.5538e-04
Loss = 2.4747e-01, PNorm = 63.4854, GNorm = 0.9213, lr_0 = 3.5913e-04
Loss = 2.3271e-01, PNorm = 63.5111, GNorm = 1.4139, lr_0 = 3.6288e-04
Loss = 2.2824e-01, PNorm = 63.5407, GNorm = 1.2258, lr_0 = 3.6662e-04
Loss = 2.3652e-01, PNorm = 63.5678, GNorm = 1.0989, lr_0 = 3.7037e-04
Loss = 2.5262e-01, PNorm = 63.6017, GNorm = 2.1287, lr_0 = 3.7413e-04
Loss = 2.5455e-01, PNorm = 63.6342, GNorm = 1.2883, lr_0 = 3.7788e-04
Loss = 2.4697e-01, PNorm = 63.6707, GNorm = 1.2195, lr_0 = 3.8163e-04
Loss = 2.4303e-01, PNorm = 63.7044, GNorm = 0.9585, lr_0 = 3.8537e-04
Loss = 2.4831e-01, PNorm = 63.7368, GNorm = 1.0067, lr_0 = 3.8912e-04
Loss = 2.2139e-01, PNorm = 63.7733, GNorm = 1.0075, lr_0 = 3.9287e-04
Loss = 2.5483e-01, PNorm = 63.8022, GNorm = 1.1186, lr_0 = 3.9663e-04
Loss = 2.5592e-01, PNorm = 63.8407, GNorm = 1.1608, lr_0 = 4.0038e-04
Loss = 2.3034e-01, PNorm = 63.8772, GNorm = 1.2317, lr_0 = 4.0413e-04
Loss = 2.7192e-01, PNorm = 63.9165, GNorm = 1.2124, lr_0 = 4.0787e-04
Loss = 2.8616e-01, PNorm = 63.9579, GNorm = 0.8122, lr_0 = 4.1162e-04
Loss = 2.3918e-01, PNorm = 64.0006, GNorm = 1.1470, lr_0 = 4.1537e-04
Loss = 2.3935e-01, PNorm = 64.0360, GNorm = 1.1619, lr_0 = 4.1913e-04
Loss = 2.3611e-01, PNorm = 64.0850, GNorm = 1.2837, lr_0 = 4.2288e-04
Loss = 2.1027e-01, PNorm = 64.1201, GNorm = 1.1894, lr_0 = 4.2662e-04
Loss = 2.2468e-01, PNorm = 64.1612, GNorm = 1.1279, lr_0 = 4.3037e-04
Loss = 2.1104e-01, PNorm = 64.2032, GNorm = 1.1492, lr_0 = 4.3412e-04
Loss = 2.0574e-01, PNorm = 64.2391, GNorm = 0.8999, lr_0 = 4.3788e-04
Loss = 2.2095e-01, PNorm = 64.2772, GNorm = 1.0743, lr_0 = 4.4163e-04
Loss = 2.2835e-01, PNorm = 64.3202, GNorm = 1.7514, lr_0 = 4.4538e-04
Loss = 2.3360e-01, PNorm = 64.3579, GNorm = 0.9818, lr_0 = 4.4912e-04
Loss = 2.4061e-01, PNorm = 64.4058, GNorm = 1.1380, lr_0 = 4.5287e-04
Loss = 2.3098e-01, PNorm = 64.4440, GNorm = 0.8636, lr_0 = 4.5662e-04
Loss = 2.4276e-01, PNorm = 64.4904, GNorm = 1.1288, lr_0 = 4.6038e-04
Loss = 2.3507e-01, PNorm = 64.5325, GNorm = 1.1813, lr_0 = 4.6413e-04
Loss = 2.0654e-01, PNorm = 64.5772, GNorm = 0.8536, lr_0 = 4.6787e-04
Loss = 2.6941e-01, PNorm = 64.6258, GNorm = 1.1295, lr_0 = 4.7162e-04
Loss = 2.1535e-01, PNorm = 64.6760, GNorm = 0.9848, lr_0 = 4.7537e-04
Loss = 2.0943e-01, PNorm = 64.7227, GNorm = 1.0509, lr_0 = 4.7913e-04
Loss = 2.4713e-01, PNorm = 64.7731, GNorm = 1.1149, lr_0 = 4.8288e-04
Loss = 2.3463e-01, PNorm = 64.8205, GNorm = 1.4369, lr_0 = 4.8663e-04
Loss = 2.4478e-01, PNorm = 64.8785, GNorm = 0.9494, lr_0 = 4.9038e-04
Loss = 2.1541e-01, PNorm = 64.9255, GNorm = 1.0979, lr_0 = 4.9412e-04
Loss = 2.2645e-01, PNorm = 64.9846, GNorm = 1.3170, lr_0 = 4.9788e-04
Loss = 2.0738e-01, PNorm = 65.0309, GNorm = 0.6551, lr_0 = 5.0163e-04
Loss = 1.9895e-01, PNorm = 65.0822, GNorm = 1.0706, lr_0 = 5.0538e-04
Loss = 2.4179e-01, PNorm = 65.1304, GNorm = 1.0348, lr_0 = 5.0913e-04
Loss = 2.1345e-01, PNorm = 65.1768, GNorm = 0.8365, lr_0 = 5.1287e-04
Loss = 2.2433e-01, PNorm = 65.2266, GNorm = 1.0169, lr_0 = 5.1663e-04
Loss = 2.1473e-01, PNorm = 65.2707, GNorm = 1.0632, lr_0 = 5.2038e-04
Loss = 2.1984e-01, PNorm = 65.3189, GNorm = 1.0958, lr_0 = 5.2413e-04
Loss = 2.2992e-01, PNorm = 65.3732, GNorm = 0.8644, lr_0 = 5.2788e-04
Loss = 2.4367e-01, PNorm = 65.4260, GNorm = 1.4944, lr_0 = 5.3162e-04
Loss = 2.0188e-01, PNorm = 65.4812, GNorm = 0.8089, lr_0 = 5.3538e-04
Loss = 2.1645e-01, PNorm = 65.5378, GNorm = 1.0254, lr_0 = 5.3912e-04
Loss = 2.0251e-01, PNorm = 65.5887, GNorm = 0.8221, lr_0 = 5.4288e-04
Loss = 2.0857e-01, PNorm = 65.6439, GNorm = 0.9532, lr_0 = 5.4663e-04
Loss = 2.3543e-01, PNorm = 65.6991, GNorm = 0.9601, lr_0 = 5.5038e-04
Validation mae = 0.551116
Epoch 1
Loss = 1.7155e-01, PNorm = 65.7503, GNorm = 1.0012, lr_0 = 5.5413e-04
Loss = 1.4413e-01, PNorm = 65.8058, GNorm = 0.4402, lr_0 = 5.5787e-04
Loss = 1.5716e-01, PNorm = 65.8541, GNorm = 0.9630, lr_0 = 5.6163e-04
Loss = 1.4261e-01, PNorm = 65.9068, GNorm = 1.0776, lr_0 = 5.6538e-04
Loss = 1.2353e-01, PNorm = 65.9511, GNorm = 0.5357, lr_0 = 5.6913e-04
Loss = 1.3266e-01, PNorm = 65.9981, GNorm = 0.6899, lr_0 = 5.7288e-04
Loss = 1.6418e-01, PNorm = 66.0465, GNorm = 0.9307, lr_0 = 5.7662e-04
Loss = 1.4038e-01, PNorm = 66.1019, GNorm = 0.6453, lr_0 = 5.8038e-04
Loss = 1.3354e-01, PNorm = 66.1552, GNorm = 0.8093, lr_0 = 5.8413e-04
Loss = 1.4191e-01, PNorm = 66.2034, GNorm = 0.8920, lr_0 = 5.8788e-04
Loss = 1.4288e-01, PNorm = 66.2615, GNorm = 0.9290, lr_0 = 5.9163e-04
Loss = 1.5015e-01, PNorm = 66.3225, GNorm = 0.7523, lr_0 = 5.9538e-04
Loss = 1.7473e-01, PNorm = 66.3844, GNorm = 0.7457, lr_0 = 5.9913e-04
Loss = 1.4642e-01, PNorm = 66.4354, GNorm = 0.7354, lr_0 = 6.0288e-04
Loss = 1.2822e-01, PNorm = 66.4989, GNorm = 0.7952, lr_0 = 6.0663e-04
Loss = 1.7608e-01, PNorm = 66.5523, GNorm = 0.8662, lr_0 = 6.1038e-04
Loss = 1.4306e-01, PNorm = 66.6190, GNorm = 0.8586, lr_0 = 6.1413e-04
Loss = 1.4797e-01, PNorm = 66.6839, GNorm = 0.9017, lr_0 = 6.1788e-04
Loss = 1.4113e-01, PNorm = 66.7461, GNorm = 0.7692, lr_0 = 6.2163e-04
Loss = 1.6115e-01, PNorm = 66.8099, GNorm = 0.7438, lr_0 = 6.2538e-04
Loss = 1.4289e-01, PNorm = 66.8783, GNorm = 0.7326, lr_0 = 6.2913e-04
Loss = 1.5676e-01, PNorm = 66.9446, GNorm = 0.8518, lr_0 = 6.3288e-04
Loss = 1.3688e-01, PNorm = 67.0149, GNorm = 1.0049, lr_0 = 6.3663e-04
Loss = 1.4788e-01, PNorm = 67.0757, GNorm = 0.8060, lr_0 = 6.4038e-04
Loss = 1.6772e-01, PNorm = 67.1510, GNorm = 0.9859, lr_0 = 6.4413e-04
Loss = 1.7364e-01, PNorm = 67.2262, GNorm = 0.7011, lr_0 = 6.4788e-04
Loss = 1.3845e-01, PNorm = 67.3068, GNorm = 0.7448, lr_0 = 6.5163e-04
Loss = 1.4155e-01, PNorm = 67.3821, GNorm = 0.8188, lr_0 = 6.5538e-04
Loss = 1.6494e-01, PNorm = 67.4656, GNorm = 1.1837, lr_0 = 6.5913e-04
Loss = 1.3951e-01, PNorm = 67.5521, GNorm = 0.6597, lr_0 = 6.6288e-04
Loss = 1.6348e-01, PNorm = 67.6315, GNorm = 0.6904, lr_0 = 6.6663e-04
Loss = 1.5438e-01, PNorm = 67.7071, GNorm = 0.6352, lr_0 = 6.7038e-04
Loss = 1.6746e-01, PNorm = 67.7931, GNorm = 1.1226, lr_0 = 6.7413e-04
Loss = 1.4775e-01, PNorm = 67.8769, GNorm = 0.7968, lr_0 = 6.7788e-04
Loss = 1.7670e-01, PNorm = 67.9566, GNorm = 0.8752, lr_0 = 6.8163e-04
Loss = 1.5876e-01, PNorm = 68.0443, GNorm = 1.4769, lr_0 = 6.8538e-04
Loss = 1.6813e-01, PNorm = 68.1297, GNorm = 0.5391, lr_0 = 6.8913e-04
Loss = 1.6346e-01, PNorm = 68.2197, GNorm = 0.8698, lr_0 = 6.9288e-04
Loss = 1.5199e-01, PNorm = 68.3195, GNorm = 0.8993, lr_0 = 6.9663e-04
Loss = 1.4979e-01, PNorm = 68.3923, GNorm = 0.9128, lr_0 = 7.0038e-04
Loss = 1.6170e-01, PNorm = 68.4870, GNorm = 1.0749, lr_0 = 7.0413e-04
Loss = 1.8558e-01, PNorm = 68.5724, GNorm = 0.8784, lr_0 = 7.0788e-04
Loss = 1.5904e-01, PNorm = 68.6676, GNorm = 0.8186, lr_0 = 7.1163e-04
Loss = 1.5453e-01, PNorm = 68.7592, GNorm = 0.8879, lr_0 = 7.1538e-04
Loss = 1.3138e-01, PNorm = 68.8452, GNorm = 0.8743, lr_0 = 7.1913e-04
Loss = 1.5853e-01, PNorm = 68.9267, GNorm = 0.8404, lr_0 = 7.2288e-04
Loss = 1.8136e-01, PNorm = 69.0207, GNorm = 0.9634, lr_0 = 7.2663e-04
Loss = 1.8392e-01, PNorm = 69.1244, GNorm = 0.8498, lr_0 = 7.3038e-04
Loss = 1.6034e-01, PNorm = 69.2302, GNorm = 0.8421, lr_0 = 7.3413e-04
Loss = 1.6262e-01, PNorm = 69.3392, GNorm = 1.0711, lr_0 = 7.3788e-04
Loss = 1.9419e-01, PNorm = 69.4450, GNorm = 0.9039, lr_0 = 7.4163e-04
Loss = 1.6589e-01, PNorm = 69.5513, GNorm = 1.2603, lr_0 = 7.4538e-04
Loss = 1.7301e-01, PNorm = 69.6451, GNorm = 0.7980, lr_0 = 7.4913e-04
Loss = 1.4950e-01, PNorm = 69.7372, GNorm = 0.7390, lr_0 = 7.5288e-04
Loss = 1.6951e-01, PNorm = 69.8208, GNorm = 0.7750, lr_0 = 7.5663e-04
Loss = 1.8030e-01, PNorm = 69.9312, GNorm = 0.7495, lr_0 = 7.6038e-04
Loss = 1.6197e-01, PNorm = 70.0287, GNorm = 0.7713, lr_0 = 7.6413e-04
Loss = 1.4284e-01, PNorm = 70.1250, GNorm = 0.7995, lr_0 = 7.6788e-04
Loss = 1.7530e-01, PNorm = 70.2106, GNorm = 0.8029, lr_0 = 7.7163e-04
Loss = 1.7624e-01, PNorm = 70.3158, GNorm = 0.9347, lr_0 = 7.7538e-04
Loss = 1.7969e-01, PNorm = 70.4172, GNorm = 0.7800, lr_0 = 7.7913e-04
Loss = 1.7884e-01, PNorm = 70.5265, GNorm = 0.7598, lr_0 = 7.8288e-04
Loss = 1.5163e-01, PNorm = 70.6352, GNorm = 1.3088, lr_0 = 7.8663e-04
Loss = 1.7412e-01, PNorm = 70.7498, GNorm = 0.6708, lr_0 = 7.9038e-04
Loss = 1.4645e-01, PNorm = 70.8523, GNorm = 0.8044, lr_0 = 7.9413e-04
Loss = 1.7274e-01, PNorm = 70.9527, GNorm = 0.7498, lr_0 = 7.9788e-04
Loss = 1.6876e-01, PNorm = 71.0664, GNorm = 1.0188, lr_0 = 8.0163e-04
Loss = 1.6355e-01, PNorm = 71.1760, GNorm = 0.8998, lr_0 = 8.0538e-04
Loss = 1.7076e-01, PNorm = 71.2827, GNorm = 1.1325, lr_0 = 8.0913e-04
Loss = 1.8694e-01, PNorm = 71.3838, GNorm = 0.7526, lr_0 = 8.1288e-04
Loss = 1.8004e-01, PNorm = 71.4933, GNorm = 0.7456, lr_0 = 8.1663e-04
Loss = 1.7399e-01, PNorm = 71.6023, GNorm = 1.0416, lr_0 = 8.2038e-04
Loss = 1.7563e-01, PNorm = 71.7205, GNorm = 0.8447, lr_0 = 8.2413e-04
Loss = 1.7278e-01, PNorm = 71.8396, GNorm = 0.7204, lr_0 = 8.2788e-04
Loss = 1.6989e-01, PNorm = 71.9477, GNorm = 0.6129, lr_0 = 8.3163e-04
Loss = 1.5430e-01, PNorm = 72.0569, GNorm = 0.6995, lr_0 = 8.3538e-04
Loss = 1.7043e-01, PNorm = 72.1687, GNorm = 1.2146, lr_0 = 8.3913e-04
Loss = 2.1641e-01, PNorm = 72.2872, GNorm = 1.7581, lr_0 = 8.4288e-04
Loss = 1.9727e-01, PNorm = 72.4300, GNorm = 0.8388, lr_0 = 8.4663e-04
Loss = 1.9305e-01, PNorm = 72.5630, GNorm = 1.5125, lr_0 = 8.5038e-04
Loss = 1.8025e-01, PNorm = 72.6971, GNorm = 1.1299, lr_0 = 8.5413e-04
Loss = 1.7441e-01, PNorm = 72.8021, GNorm = 0.7350, lr_0 = 8.5788e-04
Loss = 1.5917e-01, PNorm = 72.9255, GNorm = 0.9015, lr_0 = 8.6163e-04
Loss = 2.0041e-01, PNorm = 73.0337, GNorm = 0.7366, lr_0 = 8.6538e-04
Loss = 1.7902e-01, PNorm = 73.1539, GNorm = 0.8907, lr_0 = 8.6913e-04
Loss = 1.7291e-01, PNorm = 73.2772, GNorm = 0.6894, lr_0 = 8.7288e-04
Loss = 1.7939e-01, PNorm = 73.4020, GNorm = 0.6000, lr_0 = 8.7663e-04
Loss = 1.5988e-01, PNorm = 73.5069, GNorm = 0.7263, lr_0 = 8.8038e-04
Loss = 1.6822e-01, PNorm = 73.6161, GNorm = 0.7505, lr_0 = 8.8413e-04
Loss = 1.7810e-01, PNorm = 73.7214, GNorm = 0.7264, lr_0 = 8.8788e-04
Loss = 1.7300e-01, PNorm = 73.8362, GNorm = 0.7985, lr_0 = 8.9163e-04
Loss = 1.6732e-01, PNorm = 73.9445, GNorm = 0.7324, lr_0 = 8.9538e-04
Loss = 1.8185e-01, PNorm = 74.0603, GNorm = 0.9283, lr_0 = 8.9913e-04
Loss = 1.6677e-01, PNorm = 74.1865, GNorm = 0.8524, lr_0 = 9.0288e-04
Loss = 1.8711e-01, PNorm = 74.3128, GNorm = 0.6639, lr_0 = 9.0663e-04
Loss = 1.7347e-01, PNorm = 74.4450, GNorm = 1.0784, lr_0 = 9.1038e-04
Loss = 1.9003e-01, PNorm = 74.5620, GNorm = 0.7895, lr_0 = 9.1413e-04
Loss = 1.7040e-01, PNorm = 74.6884, GNorm = 1.2637, lr_0 = 9.1788e-04
Loss = 1.7165e-01, PNorm = 74.7918, GNorm = 1.0573, lr_0 = 9.2163e-04
Loss = 1.7138e-01, PNorm = 74.9159, GNorm = 0.8170, lr_0 = 9.2538e-04
Loss = 1.6015e-01, PNorm = 75.0282, GNorm = 0.9142, lr_0 = 9.2913e-04
Loss = 1.7769e-01, PNorm = 75.1541, GNorm = 0.6395, lr_0 = 9.3288e-04
Loss = 1.7209e-01, PNorm = 75.2733, GNorm = 0.6763, lr_0 = 9.3663e-04
Loss = 1.8073e-01, PNorm = 75.3861, GNorm = 1.1240, lr_0 = 9.4038e-04
Loss = 1.8473e-01, PNorm = 75.5045, GNorm = 1.5097, lr_0 = 9.4413e-04
Loss = 1.8522e-01, PNorm = 75.6422, GNorm = 1.0252, lr_0 = 9.4788e-04
Loss = 1.8224e-01, PNorm = 75.7786, GNorm = 0.8036, lr_0 = 9.5163e-04
Loss = 2.1418e-01, PNorm = 75.9218, GNorm = 1.2523, lr_0 = 9.5538e-04
Loss = 2.0944e-01, PNorm = 76.0814, GNorm = 0.8592, lr_0 = 9.5913e-04
Loss = 1.8185e-01, PNorm = 76.2196, GNorm = 1.1463, lr_0 = 9.6288e-04
Loss = 1.6107e-01, PNorm = 76.3586, GNorm = 0.6203, lr_0 = 9.6663e-04
Loss = 1.7580e-01, PNorm = 76.4769, GNorm = 0.6922, lr_0 = 9.7038e-04
Loss = 1.8095e-01, PNorm = 76.6152, GNorm = 0.7825, lr_0 = 9.7413e-04
Loss = 1.7307e-01, PNorm = 76.7366, GNorm = 0.9999, lr_0 = 9.7788e-04
Loss = 1.6894e-01, PNorm = 76.8752, GNorm = 0.5779, lr_0 = 9.8163e-04
Loss = 1.8274e-01, PNorm = 76.9907, GNorm = 0.7319, lr_0 = 9.8537e-04
Loss = 1.9477e-01, PNorm = 77.1291, GNorm = 0.9565, lr_0 = 9.8912e-04
Loss = 1.7645e-01, PNorm = 77.2424, GNorm = 1.2026, lr_0 = 9.9288e-04
Loss = 2.0359e-01, PNorm = 77.3914, GNorm = 1.1160, lr_0 = 9.9663e-04
Loss = 1.7838e-01, PNorm = 77.5082, GNorm = 0.9810, lr_0 = 9.9993e-04
Validation mae = 0.548454
Epoch 2
Loss = 1.2339e-01, PNorm = 77.6337, GNorm = 0.6676, lr_0 = 9.9925e-04
Loss = 1.1660e-01, PNorm = 77.7387, GNorm = 0.4309, lr_0 = 9.9856e-04
Loss = 1.0361e-01, PNorm = 77.8319, GNorm = 0.5011, lr_0 = 9.9788e-04
Loss = 1.0617e-01, PNorm = 77.9363, GNorm = 0.6646, lr_0 = 9.9719e-04
Loss = 1.1584e-01, PNorm = 78.0210, GNorm = 1.2332, lr_0 = 9.9651e-04
Loss = 1.1320e-01, PNorm = 78.1295, GNorm = 0.6966, lr_0 = 9.9583e-04
Loss = 1.1389e-01, PNorm = 78.2361, GNorm = 0.6376, lr_0 = 9.9515e-04
Loss = 8.7246e-02, PNorm = 78.3314, GNorm = 0.4396, lr_0 = 9.9446e-04
Loss = 9.7322e-02, PNorm = 78.4257, GNorm = 0.5715, lr_0 = 9.9378e-04
Loss = 1.0473e-01, PNorm = 78.5190, GNorm = 0.5230, lr_0 = 9.9310e-04
Loss = 1.0827e-01, PNorm = 78.6195, GNorm = 0.5702, lr_0 = 9.9242e-04
Loss = 1.0384e-01, PNorm = 78.7155, GNorm = 0.6830, lr_0 = 9.9174e-04
Loss = 1.0869e-01, PNorm = 78.8336, GNorm = 0.6581, lr_0 = 9.9106e-04
Loss = 9.1611e-02, PNorm = 78.9382, GNorm = 0.5803, lr_0 = 9.9038e-04
Loss = 1.0181e-01, PNorm = 79.0434, GNorm = 0.8319, lr_0 = 9.8971e-04
Loss = 1.2277e-01, PNorm = 79.1484, GNorm = 0.5339, lr_0 = 9.8903e-04
Loss = 1.1700e-01, PNorm = 79.2566, GNorm = 0.5332, lr_0 = 9.8835e-04
Loss = 1.1828e-01, PNorm = 79.3668, GNorm = 0.7473, lr_0 = 9.8767e-04
Loss = 9.8129e-02, PNorm = 79.4766, GNorm = 0.6008, lr_0 = 9.8700e-04
Loss = 9.6261e-02, PNorm = 79.5786, GNorm = 0.4756, lr_0 = 9.8632e-04
Loss = 1.0136e-01, PNorm = 79.6663, GNorm = 0.4458, lr_0 = 9.8564e-04
Loss = 9.1208e-02, PNorm = 79.7589, GNorm = 0.3717, lr_0 = 9.8497e-04
Loss = 1.0233e-01, PNorm = 79.8532, GNorm = 0.7972, lr_0 = 9.8429e-04
Loss = 1.1212e-01, PNorm = 79.9527, GNorm = 0.8865, lr_0 = 9.8362e-04
Loss = 1.1048e-01, PNorm = 80.0533, GNorm = 0.5098, lr_0 = 9.8295e-04
Loss = 9.5032e-02, PNorm = 80.1478, GNorm = 0.6955, lr_0 = 9.8227e-04
Loss = 1.2105e-01, PNorm = 80.2455, GNorm = 0.5365, lr_0 = 9.8160e-04
Loss = 1.0816e-01, PNorm = 80.3439, GNorm = 0.5812, lr_0 = 9.8093e-04
Loss = 1.0332e-01, PNorm = 80.4539, GNorm = 0.4708, lr_0 = 9.8026e-04
Loss = 1.1092e-01, PNorm = 80.5672, GNorm = 0.7706, lr_0 = 9.7958e-04
Loss = 1.1345e-01, PNorm = 80.6808, GNorm = 0.7209, lr_0 = 9.7891e-04
Loss = 1.1188e-01, PNorm = 80.7869, GNorm = 1.0986, lr_0 = 9.7824e-04
Loss = 1.0608e-01, PNorm = 80.8890, GNorm = 0.5807, lr_0 = 9.7757e-04
Loss = 1.0477e-01, PNorm = 80.9852, GNorm = 0.6048, lr_0 = 9.7690e-04
Loss = 1.3195e-01, PNorm = 81.0990, GNorm = 1.0267, lr_0 = 9.7623e-04
Loss = 1.0956e-01, PNorm = 81.2024, GNorm = 0.5853, lr_0 = 9.7556e-04
Loss = 1.0615e-01, PNorm = 81.3052, GNorm = 0.4689, lr_0 = 9.7490e-04
Loss = 9.2088e-02, PNorm = 81.3973, GNorm = 0.4945, lr_0 = 9.7423e-04
Loss = 1.1685e-01, PNorm = 81.4961, GNorm = 0.5287, lr_0 = 9.7356e-04
Loss = 1.0913e-01, PNorm = 81.6002, GNorm = 0.6335, lr_0 = 9.7289e-04
Loss = 1.0843e-01, PNorm = 81.7022, GNorm = 0.4362, lr_0 = 9.7223e-04
Loss = 1.1132e-01, PNorm = 81.8048, GNorm = 0.7274, lr_0 = 9.7156e-04
Loss = 1.1296e-01, PNorm = 81.9162, GNorm = 0.7359, lr_0 = 9.7090e-04
Loss = 1.0450e-01, PNorm = 82.0289, GNorm = 0.6962, lr_0 = 9.7023e-04
Loss = 1.1880e-01, PNorm = 82.1371, GNorm = 0.8610, lr_0 = 9.6957e-04
Loss = 1.0986e-01, PNorm = 82.2418, GNorm = 0.5254, lr_0 = 9.6890e-04
Loss = 1.0245e-01, PNorm = 82.3499, GNorm = 0.5246, lr_0 = 9.6824e-04
Loss = 1.2443e-01, PNorm = 82.4554, GNorm = 0.6900, lr_0 = 9.6757e-04
Loss = 1.0893e-01, PNorm = 82.5536, GNorm = 0.8008, lr_0 = 9.6691e-04
Loss = 1.0497e-01, PNorm = 82.6531, GNorm = 0.7814, lr_0 = 9.6625e-04
Loss = 1.1410e-01, PNorm = 82.7629, GNorm = 0.5347, lr_0 = 9.6559e-04
Loss = 1.2098e-01, PNorm = 82.8699, GNorm = 0.7871, lr_0 = 9.6493e-04
Loss = 1.0246e-01, PNorm = 82.9756, GNorm = 0.6473, lr_0 = 9.6427e-04
Loss = 1.0231e-01, PNorm = 83.0778, GNorm = 0.4799, lr_0 = 9.6360e-04
Loss = 1.0214e-01, PNorm = 83.1736, GNorm = 0.5600, lr_0 = 9.6294e-04
Loss = 1.1321e-01, PNorm = 83.2689, GNorm = 0.5900, lr_0 = 9.6228e-04
Loss = 1.1773e-01, PNorm = 83.3680, GNorm = 1.2141, lr_0 = 9.6163e-04
Loss = 1.1758e-01, PNorm = 83.4861, GNorm = 0.5983, lr_0 = 9.6097e-04
Loss = 1.0518e-01, PNorm = 83.5888, GNorm = 0.9863, lr_0 = 9.6031e-04
Loss = 1.1362e-01, PNorm = 83.6875, GNorm = 0.5717, lr_0 = 9.5965e-04
Loss = 8.8868e-02, PNorm = 83.7853, GNorm = 0.4902, lr_0 = 9.5899e-04
Loss = 1.1073e-01, PNorm = 83.8785, GNorm = 0.6898, lr_0 = 9.5834e-04
Loss = 1.0045e-01, PNorm = 83.9724, GNorm = 0.7087, lr_0 = 9.5768e-04
Loss = 1.0760e-01, PNorm = 84.0731, GNorm = 0.6133, lr_0 = 9.5702e-04
Loss = 1.0664e-01, PNorm = 84.1607, GNorm = 0.4348, lr_0 = 9.5637e-04
Loss = 1.0794e-01, PNorm = 84.2539, GNorm = 0.9353, lr_0 = 9.5571e-04
Loss = 1.1122e-01, PNorm = 84.3529, GNorm = 0.4764, lr_0 = 9.5506e-04
Loss = 1.0875e-01, PNorm = 84.4544, GNorm = 0.8540, lr_0 = 9.5440e-04
Loss = 1.3263e-01, PNorm = 84.5628, GNorm = 0.6341, lr_0 = 9.5375e-04
Loss = 1.2861e-01, PNorm = 84.6789, GNorm = 0.5038, lr_0 = 9.5310e-04
Loss = 1.0718e-01, PNorm = 84.7960, GNorm = 0.4812, lr_0 = 9.5244e-04
Loss = 1.1445e-01, PNorm = 84.9032, GNorm = 0.5404, lr_0 = 9.5179e-04
Loss = 1.0677e-01, PNorm = 85.0196, GNorm = 0.4691, lr_0 = 9.5114e-04
Loss = 1.1311e-01, PNorm = 85.1298, GNorm = 0.8739, lr_0 = 9.5049e-04
Loss = 1.0951e-01, PNorm = 85.2511, GNorm = 0.5570, lr_0 = 9.4984e-04
Loss = 1.1380e-01, PNorm = 85.3558, GNorm = 0.3918, lr_0 = 9.4919e-04
Loss = 1.0416e-01, PNorm = 85.4769, GNorm = 0.6023, lr_0 = 9.4854e-04
Loss = 1.2729e-01, PNorm = 85.5855, GNorm = 0.4823, lr_0 = 9.4789e-04
Loss = 1.1702e-01, PNorm = 85.7099, GNorm = 1.1747, lr_0 = 9.4724e-04
Loss = 1.1190e-01, PNorm = 85.8346, GNorm = 0.7858, lr_0 = 9.4659e-04
Loss = 1.2054e-01, PNorm = 85.9538, GNorm = 0.8936, lr_0 = 9.4594e-04
Loss = 1.1902e-01, PNorm = 86.0596, GNorm = 0.6050, lr_0 = 9.4529e-04
Loss = 1.1245e-01, PNorm = 86.1777, GNorm = 0.6679, lr_0 = 9.4464e-04
Loss = 1.0192e-01, PNorm = 86.2812, GNorm = 0.5823, lr_0 = 9.4400e-04
Loss = 1.0611e-01, PNorm = 86.3768, GNorm = 0.7251, lr_0 = 9.4335e-04
Loss = 1.2329e-01, PNorm = 86.4894, GNorm = 0.8513, lr_0 = 9.4270e-04
Loss = 1.1591e-01, PNorm = 86.5926, GNorm = 0.6136, lr_0 = 9.4206e-04
Loss = 1.0970e-01, PNorm = 86.7031, GNorm = 0.7349, lr_0 = 9.4141e-04
Loss = 1.1974e-01, PNorm = 86.8178, GNorm = 1.4219, lr_0 = 9.4077e-04
Loss = 1.0410e-01, PNorm = 86.9311, GNorm = 0.5853, lr_0 = 9.4012e-04
Loss = 1.3797e-01, PNorm = 87.0420, GNorm = 0.9077, lr_0 = 9.3948e-04
Loss = 1.3605e-01, PNorm = 87.1672, GNorm = 0.7012, lr_0 = 9.3884e-04
Loss = 1.2303e-01, PNorm = 87.2747, GNorm = 0.9056, lr_0 = 9.3819e-04
Loss = 1.1733e-01, PNorm = 87.4000, GNorm = 0.5344, lr_0 = 9.3755e-04
Loss = 1.1805e-01, PNorm = 87.5245, GNorm = 0.7767, lr_0 = 9.3691e-04
Loss = 1.1035e-01, PNorm = 87.6354, GNorm = 0.6968, lr_0 = 9.3627e-04
Loss = 1.0250e-01, PNorm = 87.7377, GNorm = 0.5608, lr_0 = 9.3562e-04
Loss = 1.1993e-01, PNorm = 87.8397, GNorm = 0.6550, lr_0 = 9.3498e-04
Loss = 1.2289e-01, PNorm = 87.9526, GNorm = 0.8106, lr_0 = 9.3434e-04
Loss = 1.1654e-01, PNorm = 88.0637, GNorm = 0.9973, lr_0 = 9.3370e-04
Loss = 1.1291e-01, PNorm = 88.1780, GNorm = 0.6083, lr_0 = 9.3306e-04
Loss = 1.0912e-01, PNorm = 88.2845, GNorm = 0.6522, lr_0 = 9.3242e-04
Loss = 1.1926e-01, PNorm = 88.3926, GNorm = 1.0026, lr_0 = 9.3178e-04
Loss = 1.2492e-01, PNorm = 88.5075, GNorm = 0.5772, lr_0 = 9.3115e-04
Loss = 1.1392e-01, PNorm = 88.6203, GNorm = 0.8594, lr_0 = 9.3051e-04
Loss = 1.3413e-01, PNorm = 88.7306, GNorm = 0.5038, lr_0 = 9.2987e-04
Loss = 1.1819e-01, PNorm = 88.8504, GNorm = 0.6181, lr_0 = 9.2923e-04
Loss = 1.1549e-01, PNorm = 88.9618, GNorm = 0.5182, lr_0 = 9.2860e-04
Loss = 1.2072e-01, PNorm = 89.0658, GNorm = 0.8628, lr_0 = 9.2796e-04
Loss = 1.2275e-01, PNorm = 89.1828, GNorm = 0.4187, lr_0 = 9.2733e-04
Loss = 1.4053e-01, PNorm = 89.2726, GNorm = 1.0901, lr_0 = 9.2669e-04
Loss = 1.2045e-01, PNorm = 89.3925, GNorm = 0.6126, lr_0 = 9.2606e-04
Loss = 1.2685e-01, PNorm = 89.5184, GNorm = 0.4417, lr_0 = 9.2542e-04
Loss = 1.1260e-01, PNorm = 89.6502, GNorm = 1.1926, lr_0 = 9.2479e-04
Loss = 1.2820e-01, PNorm = 89.7728, GNorm = 0.4069, lr_0 = 9.2415e-04
Loss = 1.1812e-01, PNorm = 89.8857, GNorm = 0.4634, lr_0 = 9.2352e-04
Loss = 1.0762e-01, PNorm = 89.9869, GNorm = 0.5803, lr_0 = 9.2289e-04
Loss = 1.1295e-01, PNorm = 90.0911, GNorm = 0.5597, lr_0 = 9.2226e-04
Loss = 1.2252e-01, PNorm = 90.1868, GNorm = 0.5444, lr_0 = 9.2162e-04
Loss = 1.1462e-01, PNorm = 90.3029, GNorm = 0.3637, lr_0 = 9.2099e-04
Validation mae = 0.512674
Epoch 3
Loss = 6.9211e-02, PNorm = 90.4053, GNorm = 0.4322, lr_0 = 9.2036e-04
Loss = 6.9503e-02, PNorm = 90.4828, GNorm = 1.0571, lr_0 = 9.1973e-04
Loss = 7.8919e-02, PNorm = 90.5613, GNorm = 0.8153, lr_0 = 9.1910e-04
Loss = 7.3397e-02, PNorm = 90.6340, GNorm = 0.5413, lr_0 = 9.1847e-04
Loss = 6.8690e-02, PNorm = 90.7053, GNorm = 0.9664, lr_0 = 9.1784e-04
Loss = 7.0771e-02, PNorm = 90.7668, GNorm = 0.5212, lr_0 = 9.1721e-04
Loss = 6.1444e-02, PNorm = 90.8329, GNorm = 0.3878, lr_0 = 9.1658e-04
Loss = 6.8558e-02, PNorm = 90.8926, GNorm = 0.2425, lr_0 = 9.1596e-04
Loss = 6.9231e-02, PNorm = 90.9584, GNorm = 0.3165, lr_0 = 9.1533e-04
Loss = 6.7634e-02, PNorm = 91.0114, GNorm = 0.8024, lr_0 = 9.1470e-04
Loss = 6.1964e-02, PNorm = 91.0805, GNorm = 0.8169, lr_0 = 9.1408e-04
Loss = 6.5174e-02, PNorm = 91.1430, GNorm = 0.3564, lr_0 = 9.1345e-04
Loss = 6.2335e-02, PNorm = 91.2038, GNorm = 0.3491, lr_0 = 9.1282e-04
Loss = 6.3818e-02, PNorm = 91.2613, GNorm = 0.5806, lr_0 = 9.1220e-04
Loss = 7.5179e-02, PNorm = 91.3302, GNorm = 1.2291, lr_0 = 9.1157e-04
Loss = 6.7247e-02, PNorm = 91.3972, GNorm = 0.7017, lr_0 = 9.1095e-04
Loss = 5.8695e-02, PNorm = 91.4622, GNorm = 0.4976, lr_0 = 9.1032e-04
Loss = 6.2235e-02, PNorm = 91.5265, GNorm = 0.3095, lr_0 = 9.0970e-04
Loss = 6.3736e-02, PNorm = 91.5921, GNorm = 0.5790, lr_0 = 9.0908e-04
Loss = 6.0408e-02, PNorm = 91.6622, GNorm = 0.6456, lr_0 = 9.0846e-04
Loss = 6.1479e-02, PNorm = 91.7279, GNorm = 0.3564, lr_0 = 9.0783e-04
Loss = 6.6930e-02, PNorm = 91.7907, GNorm = 0.4795, lr_0 = 9.0721e-04
Loss = 6.7563e-02, PNorm = 91.8616, GNorm = 0.5219, lr_0 = 9.0659e-04
Loss = 7.5421e-02, PNorm = 91.9316, GNorm = 0.2237, lr_0 = 9.0597e-04
Loss = 6.4893e-02, PNorm = 92.0040, GNorm = 0.7420, lr_0 = 9.0535e-04
Loss = 5.9516e-02, PNorm = 92.0577, GNorm = 0.3825, lr_0 = 9.0473e-04
Loss = 7.0468e-02, PNorm = 92.1264, GNorm = 0.4343, lr_0 = 9.0411e-04
Loss = 5.9931e-02, PNorm = 92.2003, GNorm = 0.7695, lr_0 = 9.0349e-04
Loss = 6.4041e-02, PNorm = 92.2616, GNorm = 0.4971, lr_0 = 9.0287e-04
Loss = 6.5759e-02, PNorm = 92.3342, GNorm = 0.3482, lr_0 = 9.0225e-04
Loss = 5.3572e-02, PNorm = 92.4003, GNorm = 0.4493, lr_0 = 9.0163e-04
Loss = 6.0940e-02, PNorm = 92.4634, GNorm = 0.3663, lr_0 = 9.0102e-04
Loss = 7.3616e-02, PNorm = 92.5318, GNorm = 1.3169, lr_0 = 9.0040e-04
Loss = 6.6712e-02, PNorm = 92.6037, GNorm = 0.3715, lr_0 = 8.9978e-04
Loss = 7.3508e-02, PNorm = 92.6831, GNorm = 0.7228, lr_0 = 8.9916e-04
Loss = 6.4320e-02, PNorm = 92.7552, GNorm = 0.4164, lr_0 = 8.9855e-04
Loss = 5.5739e-02, PNorm = 92.8245, GNorm = 0.6047, lr_0 = 8.9793e-04
Loss = 5.9828e-02, PNorm = 92.8835, GNorm = 0.2971, lr_0 = 8.9732e-04
Loss = 5.7800e-02, PNorm = 92.9500, GNorm = 0.3754, lr_0 = 8.9670e-04
Loss = 7.1671e-02, PNorm = 93.0235, GNorm = 1.0456, lr_0 = 8.9609e-04
Loss = 6.2738e-02, PNorm = 93.0985, GNorm = 0.4749, lr_0 = 8.9548e-04
Loss = 6.4355e-02, PNorm = 93.1710, GNorm = 0.7338, lr_0 = 8.9486e-04
Loss = 6.7222e-02, PNorm = 93.2292, GNorm = 0.4185, lr_0 = 8.9425e-04
Loss = 6.9383e-02, PNorm = 93.3043, GNorm = 0.6163, lr_0 = 8.9364e-04
Loss = 6.0098e-02, PNorm = 93.3669, GNorm = 0.6503, lr_0 = 8.9302e-04
Loss = 6.8784e-02, PNorm = 93.4383, GNorm = 0.4967, lr_0 = 8.9241e-04
Loss = 8.0260e-02, PNorm = 93.5149, GNorm = 0.6606, lr_0 = 8.9180e-04
Loss = 6.5640e-02, PNorm = 93.5919, GNorm = 0.3715, lr_0 = 8.9119e-04
Loss = 7.8822e-02, PNorm = 93.6751, GNorm = 0.3200, lr_0 = 8.9058e-04
Loss = 6.4175e-02, PNorm = 93.7563, GNorm = 0.8803, lr_0 = 8.8997e-04
Loss = 6.8226e-02, PNorm = 93.8411, GNorm = 0.6114, lr_0 = 8.8936e-04
Loss = 8.4088e-02, PNorm = 93.9230, GNorm = 0.4832, lr_0 = 8.8875e-04
Loss = 6.2832e-02, PNorm = 94.0167, GNorm = 0.7045, lr_0 = 8.8814e-04
Loss = 6.8008e-02, PNorm = 94.0834, GNorm = 0.3864, lr_0 = 8.8753e-04
Loss = 6.9539e-02, PNorm = 94.1553, GNorm = 1.0618, lr_0 = 8.8693e-04
Loss = 7.3406e-02, PNorm = 94.2362, GNorm = 0.4107, lr_0 = 8.8632e-04
Loss = 8.4921e-02, PNorm = 94.3256, GNorm = 0.3781, lr_0 = 8.8571e-04
Loss = 7.0009e-02, PNorm = 94.4190, GNorm = 0.4580, lr_0 = 8.8510e-04
Loss = 6.8647e-02, PNorm = 94.4974, GNorm = 0.4595, lr_0 = 8.8450e-04
Loss = 7.3586e-02, PNorm = 94.5862, GNorm = 0.5170, lr_0 = 8.8389e-04
Loss = 7.0661e-02, PNorm = 94.6671, GNorm = 0.9201, lr_0 = 8.8329e-04
Loss = 6.9832e-02, PNorm = 94.7482, GNorm = 0.7062, lr_0 = 8.8268e-04
Loss = 7.5162e-02, PNorm = 94.8350, GNorm = 0.9207, lr_0 = 8.8208e-04
Loss = 8.0348e-02, PNorm = 94.9183, GNorm = 0.5144, lr_0 = 8.8147e-04
Loss = 7.0504e-02, PNorm = 95.0059, GNorm = 0.6897, lr_0 = 8.8087e-04
Loss = 8.4887e-02, PNorm = 95.0980, GNorm = 0.8816, lr_0 = 8.8026e-04
Loss = 7.1826e-02, PNorm = 95.1777, GNorm = 0.8300, lr_0 = 8.7966e-04
Loss = 7.7095e-02, PNorm = 95.2697, GNorm = 0.4453, lr_0 = 8.7906e-04
Loss = 8.4270e-02, PNorm = 95.3543, GNorm = 0.5824, lr_0 = 8.7846e-04
Loss = 8.2548e-02, PNorm = 95.4537, GNorm = 0.4971, lr_0 = 8.7785e-04
Loss = 6.0773e-02, PNorm = 95.5384, GNorm = 0.3895, lr_0 = 8.7725e-04
Loss = 6.6146e-02, PNorm = 95.6191, GNorm = 0.6308, lr_0 = 8.7665e-04
Loss = 7.2175e-02, PNorm = 95.7036, GNorm = 0.5436, lr_0 = 8.7605e-04
Loss = 6.8489e-02, PNorm = 95.7797, GNorm = 0.3416, lr_0 = 8.7545e-04
Loss = 7.1245e-02, PNorm = 95.8688, GNorm = 0.4995, lr_0 = 8.7485e-04
Loss = 7.7143e-02, PNorm = 95.9616, GNorm = 0.8836, lr_0 = 8.7425e-04
Loss = 7.5029e-02, PNorm = 96.0542, GNorm = 0.5881, lr_0 = 8.7365e-04
Loss = 7.1703e-02, PNorm = 96.1405, GNorm = 0.4683, lr_0 = 8.7306e-04
Loss = 7.0416e-02, PNorm = 96.2180, GNorm = 0.7993, lr_0 = 8.7246e-04
Loss = 8.3262e-02, PNorm = 96.2896, GNorm = 1.1423, lr_0 = 8.7186e-04
Loss = 7.4694e-02, PNorm = 96.3812, GNorm = 0.6703, lr_0 = 8.7126e-04
Loss = 7.6555e-02, PNorm = 96.4723, GNorm = 0.6974, lr_0 = 8.7067e-04
Loss = 7.4175e-02, PNorm = 96.5613, GNorm = 0.8409, lr_0 = 8.7007e-04
Loss = 7.5127e-02, PNorm = 96.6436, GNorm = 0.6196, lr_0 = 8.6947e-04
Loss = 7.7927e-02, PNorm = 96.7241, GNorm = 0.4864, lr_0 = 8.6888e-04
Loss = 7.7214e-02, PNorm = 96.8158, GNorm = 1.3739, lr_0 = 8.6828e-04
Loss = 7.6199e-02, PNorm = 96.9002, GNorm = 0.5336, lr_0 = 8.6769e-04
Loss = 6.7302e-02, PNorm = 96.9914, GNorm = 0.7026, lr_0 = 8.6709e-04
Loss = 7.2453e-02, PNorm = 97.0733, GNorm = 0.3757, lr_0 = 8.6650e-04
Loss = 7.6919e-02, PNorm = 97.1541, GNorm = 0.6290, lr_0 = 8.6590e-04
Loss = 7.8919e-02, PNorm = 97.2400, GNorm = 0.6495, lr_0 = 8.6531e-04
Loss = 7.9602e-02, PNorm = 97.3311, GNorm = 0.8665, lr_0 = 8.6472e-04
Loss = 7.8950e-02, PNorm = 97.4134, GNorm = 0.7203, lr_0 = 8.6413e-04
Loss = 7.6683e-02, PNorm = 97.5098, GNorm = 0.5054, lr_0 = 8.6353e-04
Loss = 6.2507e-02, PNorm = 97.5893, GNorm = 0.3917, lr_0 = 8.6294e-04
Loss = 6.5200e-02, PNorm = 97.6698, GNorm = 0.3929, lr_0 = 8.6235e-04
Loss = 7.3282e-02, PNorm = 97.7296, GNorm = 0.4391, lr_0 = 8.6176e-04
Loss = 8.9057e-02, PNorm = 97.8230, GNorm = 0.5592, lr_0 = 8.6117e-04
Loss = 7.7655e-02, PNorm = 97.9242, GNorm = 0.7749, lr_0 = 8.6058e-04
Loss = 7.3214e-02, PNorm = 98.0108, GNorm = 0.6173, lr_0 = 8.5999e-04
Loss = 9.7033e-02, PNorm = 98.1107, GNorm = 0.4457, lr_0 = 8.5940e-04
Loss = 6.7483e-02, PNorm = 98.2093, GNorm = 0.3083, lr_0 = 8.5881e-04
Loss = 8.3840e-02, PNorm = 98.3074, GNorm = 0.4778, lr_0 = 8.5823e-04
Loss = 6.7346e-02, PNorm = 98.3958, GNorm = 0.4113, lr_0 = 8.5764e-04
Loss = 7.0072e-02, PNorm = 98.4871, GNorm = 0.3984, lr_0 = 8.5705e-04
Loss = 8.5578e-02, PNorm = 98.5801, GNorm = 0.8508, lr_0 = 8.5646e-04
Loss = 9.0805e-02, PNorm = 98.6715, GNorm = 0.6812, lr_0 = 8.5588e-04
Loss = 7.2942e-02, PNorm = 98.7695, GNorm = 0.5607, lr_0 = 8.5529e-04
Loss = 8.0147e-02, PNorm = 98.8597, GNorm = 0.3086, lr_0 = 8.5470e-04
Loss = 7.2952e-02, PNorm = 98.9562, GNorm = 0.2947, lr_0 = 8.5412e-04
Loss = 7.8169e-02, PNorm = 99.0409, GNorm = 0.5419, lr_0 = 8.5353e-04
Loss = 7.9145e-02, PNorm = 99.1209, GNorm = 0.3211, lr_0 = 8.5295e-04
Loss = 8.1124e-02, PNorm = 99.2148, GNorm = 0.7957, lr_0 = 8.5236e-04
Loss = 8.2975e-02, PNorm = 99.2915, GNorm = 0.8890, lr_0 = 8.5178e-04
Loss = 7.6074e-02, PNorm = 99.3872, GNorm = 0.4884, lr_0 = 8.5120e-04
Loss = 7.6550e-02, PNorm = 99.4775, GNorm = 0.8482, lr_0 = 8.5061e-04
Loss = 8.6679e-02, PNorm = 99.5692, GNorm = 0.3718, lr_0 = 8.5003e-04
Loss = 7.8728e-02, PNorm = 99.6676, GNorm = 0.4273, lr_0 = 8.4945e-04
Loss = 6.9611e-02, PNorm = 99.7669, GNorm = 0.4049, lr_0 = 8.4887e-04
Loss = 7.3516e-02, PNorm = 99.8608, GNorm = 0.4026, lr_0 = 8.4828e-04
Validation mae = 0.502686
Epoch 4
Loss = 5.8478e-02, PNorm = 99.9430, GNorm = 0.4540, lr_0 = 8.4770e-04
Loss = 5.1560e-02, PNorm = 100.0132, GNorm = 0.3948, lr_0 = 8.4712e-04
Loss = 5.2415e-02, PNorm = 100.0806, GNorm = 0.7137, lr_0 = 8.4654e-04
Loss = 4.6439e-02, PNorm = 100.1299, GNorm = 0.2853, lr_0 = 8.4596e-04
Loss = 4.1940e-02, PNorm = 100.1859, GNorm = 0.3284, lr_0 = 8.4538e-04
Loss = 4.5754e-02, PNorm = 100.2374, GNorm = 0.3421, lr_0 = 8.4480e-04
Loss = 4.4306e-02, PNorm = 100.2992, GNorm = 0.2541, lr_0 = 8.4423e-04
Loss = 4.4175e-02, PNorm = 100.3574, GNorm = 0.3959, lr_0 = 8.4365e-04
Loss = 5.2188e-02, PNorm = 100.4063, GNorm = 0.4934, lr_0 = 8.4307e-04
Loss = 4.3919e-02, PNorm = 100.4547, GNorm = 0.3873, lr_0 = 8.4249e-04
Loss = 4.1232e-02, PNorm = 100.5077, GNorm = 0.5430, lr_0 = 8.4191e-04
Loss = 4.1035e-02, PNorm = 100.5575, GNorm = 0.4324, lr_0 = 8.4134e-04
Loss = 4.2857e-02, PNorm = 100.6094, GNorm = 0.5634, lr_0 = 8.4076e-04
Loss = 4.2603e-02, PNorm = 100.6619, GNorm = 0.7480, lr_0 = 8.4019e-04
Loss = 4.3480e-02, PNorm = 100.7146, GNorm = 0.3244, lr_0 = 8.3961e-04
Loss = 4.2595e-02, PNorm = 100.7561, GNorm = 0.2647, lr_0 = 8.3903e-04
Loss = 5.2634e-02, PNorm = 100.8179, GNorm = 0.8019, lr_0 = 8.3846e-04
Loss = 4.8556e-02, PNorm = 100.8794, GNorm = 0.7700, lr_0 = 8.3789e-04
Loss = 5.0989e-02, PNorm = 100.9411, GNorm = 0.6188, lr_0 = 8.3731e-04
Loss = 4.9245e-02, PNorm = 100.9926, GNorm = 0.3957, lr_0 = 8.3674e-04
Loss = 5.3420e-02, PNorm = 101.0451, GNorm = 0.8122, lr_0 = 8.3616e-04
Loss = 5.0737e-02, PNorm = 101.1120, GNorm = 0.3263, lr_0 = 8.3559e-04
Loss = 4.3864e-02, PNorm = 101.1742, GNorm = 0.3129, lr_0 = 8.3502e-04
Loss = 4.4954e-02, PNorm = 101.2378, GNorm = 0.5224, lr_0 = 8.3445e-04
Loss = 5.0596e-02, PNorm = 101.3004, GNorm = 1.0329, lr_0 = 8.3388e-04
Loss = 4.5525e-02, PNorm = 101.3681, GNorm = 0.3643, lr_0 = 8.3330e-04
Loss = 4.5767e-02, PNorm = 101.4268, GNorm = 0.4766, lr_0 = 8.3273e-04
Loss = 4.9182e-02, PNorm = 101.4875, GNorm = 0.3277, lr_0 = 8.3216e-04
Loss = 5.2700e-02, PNorm = 101.5519, GNorm = 0.3541, lr_0 = 8.3159e-04
Loss = 5.4949e-02, PNorm = 101.6204, GNorm = 0.2446, lr_0 = 8.3102e-04
Loss = 4.6055e-02, PNorm = 101.6921, GNorm = 0.2943, lr_0 = 8.3045e-04
Loss = 4.1958e-02, PNorm = 101.7487, GNorm = 0.5278, lr_0 = 8.2988e-04
Loss = 4.1505e-02, PNorm = 101.8113, GNorm = 0.4454, lr_0 = 8.2932e-04
Loss = 4.7260e-02, PNorm = 101.8572, GNorm = 0.6665, lr_0 = 8.2875e-04
Loss = 4.9411e-02, PNorm = 101.9216, GNorm = 0.3319, lr_0 = 8.2818e-04
Loss = 4.5068e-02, PNorm = 101.9836, GNorm = 0.3347, lr_0 = 8.2761e-04
Loss = 4.5651e-02, PNorm = 102.0457, GNorm = 0.3738, lr_0 = 8.2705e-04
Loss = 4.1888e-02, PNorm = 102.1053, GNorm = 0.4947, lr_0 = 8.2648e-04
Loss = 4.8915e-02, PNorm = 102.1647, GNorm = 0.4121, lr_0 = 8.2591e-04
Loss = 4.9142e-02, PNorm = 102.2244, GNorm = 0.4477, lr_0 = 8.2535e-04
Loss = 6.0000e-02, PNorm = 102.2934, GNorm = 2.0013, lr_0 = 8.2478e-04
Loss = 5.0078e-02, PNorm = 102.3584, GNorm = 0.7950, lr_0 = 8.2422e-04
Loss = 4.7578e-02, PNorm = 102.4341, GNorm = 0.2797, lr_0 = 8.2365e-04
Loss = 4.4983e-02, PNorm = 102.4963, GNorm = 0.2806, lr_0 = 8.2309e-04
Loss = 4.7155e-02, PNorm = 102.5601, GNorm = 0.7532, lr_0 = 8.2252e-04
Loss = 4.4030e-02, PNorm = 102.6200, GNorm = 0.5339, lr_0 = 8.2196e-04
Loss = 5.0192e-02, PNorm = 102.6874, GNorm = 0.3362, lr_0 = 8.2140e-04
Loss = 4.2554e-02, PNorm = 102.7513, GNorm = 0.7095, lr_0 = 8.2084e-04
Loss = 4.4769e-02, PNorm = 102.8254, GNorm = 0.6644, lr_0 = 8.2027e-04
Loss = 4.3508e-02, PNorm = 102.8878, GNorm = 0.3710, lr_0 = 8.1971e-04
Loss = 4.9567e-02, PNorm = 102.9507, GNorm = 0.4386, lr_0 = 8.1915e-04
Loss = 4.7582e-02, PNorm = 103.0084, GNorm = 0.2618, lr_0 = 8.1859e-04
Loss = 4.4568e-02, PNorm = 103.0750, GNorm = 0.2208, lr_0 = 8.1803e-04
Loss = 5.3612e-02, PNorm = 103.1420, GNorm = 0.9731, lr_0 = 8.1747e-04
Loss = 4.5596e-02, PNorm = 103.2164, GNorm = 0.5260, lr_0 = 8.1691e-04
Loss = 4.8256e-02, PNorm = 103.2804, GNorm = 0.9709, lr_0 = 8.1635e-04
Loss = 4.4943e-02, PNorm = 103.3447, GNorm = 0.5116, lr_0 = 8.1579e-04
Loss = 4.0472e-02, PNorm = 103.4130, GNorm = 0.6063, lr_0 = 8.1523e-04
Loss = 3.8612e-02, PNorm = 103.4799, GNorm = 0.4562, lr_0 = 8.1467e-04
Loss = 4.9898e-02, PNorm = 103.5401, GNorm = 0.7347, lr_0 = 8.1411e-04
Loss = 4.8383e-02, PNorm = 103.6031, GNorm = 1.1063, lr_0 = 8.1355e-04
Loss = 4.6706e-02, PNorm = 103.6660, GNorm = 0.3372, lr_0 = 8.1300e-04
Loss = 4.6428e-02, PNorm = 103.7267, GNorm = 0.3500, lr_0 = 8.1244e-04
Loss = 4.7909e-02, PNorm = 103.7994, GNorm = 0.3911, lr_0 = 8.1188e-04
Loss = 5.3847e-02, PNorm = 103.8574, GNorm = 0.5661, lr_0 = 8.1133e-04
Loss = 4.7294e-02, PNorm = 103.9287, GNorm = 0.4581, lr_0 = 8.1077e-04
Loss = 5.3047e-02, PNorm = 103.9910, GNorm = 0.4690, lr_0 = 8.1022e-04
Loss = 5.3029e-02, PNorm = 104.0665, GNorm = 0.4507, lr_0 = 8.0966e-04
Loss = 5.2505e-02, PNorm = 104.1440, GNorm = 0.3680, lr_0 = 8.0911e-04
Loss = 5.1298e-02, PNorm = 104.2173, GNorm = 0.8235, lr_0 = 8.0855e-04
Loss = 5.9766e-02, PNorm = 104.3022, GNorm = 0.4955, lr_0 = 8.0800e-04
Loss = 4.4524e-02, PNorm = 104.3780, GNorm = 0.3130, lr_0 = 8.0745e-04
Loss = 4.8612e-02, PNorm = 104.4494, GNorm = 0.5051, lr_0 = 8.0689e-04
Loss = 5.8095e-02, PNorm = 104.5170, GNorm = 0.3373, lr_0 = 8.0634e-04
Loss = 5.7559e-02, PNorm = 104.5904, GNorm = 0.3942, lr_0 = 8.0579e-04
Loss = 4.3986e-02, PNorm = 104.6582, GNorm = 0.4670, lr_0 = 8.0523e-04
Loss = 4.3270e-02, PNorm = 104.7204, GNorm = 0.3589, lr_0 = 8.0468e-04
Loss = 5.4122e-02, PNorm = 104.7900, GNorm = 0.6661, lr_0 = 8.0413e-04
Loss = 4.9380e-02, PNorm = 104.8524, GNorm = 0.6493, lr_0 = 8.0358e-04
Loss = 5.5390e-02, PNorm = 104.9224, GNorm = 0.5399, lr_0 = 8.0303e-04
Loss = 4.1567e-02, PNorm = 104.9982, GNorm = 0.4104, lr_0 = 8.0248e-04
Loss = 4.8891e-02, PNorm = 105.0677, GNorm = 0.3890, lr_0 = 8.0193e-04
Loss = 4.9111e-02, PNorm = 105.1336, GNorm = 0.3756, lr_0 = 8.0138e-04
Loss = 4.8768e-02, PNorm = 105.1972, GNorm = 0.5416, lr_0 = 8.0083e-04
Loss = 5.4754e-02, PNorm = 105.2667, GNorm = 0.4172, lr_0 = 8.0028e-04
Loss = 5.4916e-02, PNorm = 105.3363, GNorm = 0.7121, lr_0 = 7.9974e-04
Loss = 5.2358e-02, PNorm = 105.4071, GNorm = 0.4638, lr_0 = 7.9919e-04
Loss = 5.4300e-02, PNorm = 105.4913, GNorm = 0.3243, lr_0 = 7.9864e-04
Loss = 5.0217e-02, PNorm = 105.5678, GNorm = 0.6003, lr_0 = 7.9809e-04
Loss = 4.7480e-02, PNorm = 105.6404, GNorm = 0.3300, lr_0 = 7.9755e-04
Loss = 6.0956e-02, PNorm = 105.7183, GNorm = 0.5616, lr_0 = 7.9700e-04
Loss = 4.6381e-02, PNorm = 105.7926, GNorm = 0.4192, lr_0 = 7.9645e-04
Loss = 5.7963e-02, PNorm = 105.8696, GNorm = 0.3699, lr_0 = 7.9591e-04
Loss = 4.2252e-02, PNorm = 105.9407, GNorm = 0.4892, lr_0 = 7.9536e-04
Loss = 4.5071e-02, PNorm = 106.0024, GNorm = 0.3639, lr_0 = 7.9482e-04
Loss = 4.5878e-02, PNorm = 106.0716, GNorm = 0.8029, lr_0 = 7.9427e-04
Loss = 5.3688e-02, PNorm = 106.1442, GNorm = 0.4595, lr_0 = 7.9373e-04
Loss = 5.7854e-02, PNorm = 106.2177, GNorm = 0.3804, lr_0 = 7.9319e-04
Loss = 4.6237e-02, PNorm = 106.3015, GNorm = 0.4747, lr_0 = 7.9264e-04
Loss = 5.7237e-02, PNorm = 106.3765, GNorm = 0.6404, lr_0 = 7.9210e-04
Loss = 5.9618e-02, PNorm = 106.4516, GNorm = 0.5233, lr_0 = 7.9156e-04
Loss = 6.3704e-02, PNorm = 106.5372, GNorm = 0.3974, lr_0 = 7.9101e-04
Loss = 5.4305e-02, PNorm = 106.6115, GNorm = 0.6529, lr_0 = 7.9047e-04
Loss = 5.0849e-02, PNorm = 106.6921, GNorm = 0.9060, lr_0 = 7.8993e-04
Loss = 5.2870e-02, PNorm = 106.7696, GNorm = 0.3996, lr_0 = 7.8939e-04
Loss = 5.2654e-02, PNorm = 106.8526, GNorm = 0.8422, lr_0 = 7.8885e-04
Loss = 5.2640e-02, PNorm = 106.9364, GNorm = 0.2775, lr_0 = 7.8831e-04
Loss = 5.7178e-02, PNorm = 107.0159, GNorm = 0.3522, lr_0 = 7.8777e-04
Loss = 5.2144e-02, PNorm = 107.0977, GNorm = 0.5810, lr_0 = 7.8723e-04
Loss = 5.1485e-02, PNorm = 107.1752, GNorm = 0.6321, lr_0 = 7.8669e-04
Loss = 5.5806e-02, PNorm = 107.2504, GNorm = 0.9332, lr_0 = 7.8615e-04
Loss = 5.3960e-02, PNorm = 107.3326, GNorm = 0.4680, lr_0 = 7.8561e-04
Loss = 5.4166e-02, PNorm = 107.4160, GNorm = 0.5543, lr_0 = 7.8507e-04
Loss = 6.5661e-02, PNorm = 107.5020, GNorm = 0.3155, lr_0 = 7.8454e-04
Loss = 5.3571e-02, PNorm = 107.5896, GNorm = 0.3726, lr_0 = 7.8400e-04
Loss = 5.7295e-02, PNorm = 107.6746, GNorm = 0.4346, lr_0 = 7.8346e-04
Loss = 4.8969e-02, PNorm = 107.7605, GNorm = 0.4494, lr_0 = 7.8293e-04
Loss = 5.6561e-02, PNorm = 107.8316, GNorm = 0.3520, lr_0 = 7.8239e-04
Loss = 5.5827e-02, PNorm = 107.9071, GNorm = 0.7505, lr_0 = 7.8185e-04
Loss = 4.7748e-02, PNorm = 107.9844, GNorm = 0.3626, lr_0 = 7.8132e-04
Validation mae = 0.498706
Epoch 5
Loss = 3.9541e-02, PNorm = 108.0468, GNorm = 0.4126, lr_0 = 7.8078e-04
Loss = 3.5072e-02, PNorm = 108.0996, GNorm = 0.2931, lr_0 = 7.8025e-04
Loss = 3.3905e-02, PNorm = 108.1450, GNorm = 0.5405, lr_0 = 7.7971e-04
Loss = 3.2139e-02, PNorm = 108.1913, GNorm = 0.3337, lr_0 = 7.7918e-04
Loss = 3.3938e-02, PNorm = 108.2395, GNorm = 0.3396, lr_0 = 7.7864e-04
Loss = 3.7025e-02, PNorm = 108.2875, GNorm = 0.2911, lr_0 = 7.7811e-04
Loss = 3.2983e-02, PNorm = 108.3404, GNorm = 0.3678, lr_0 = 7.7758e-04
Loss = 3.4927e-02, PNorm = 108.3879, GNorm = 0.6691, lr_0 = 7.7705e-04
Loss = 3.8267e-02, PNorm = 108.4355, GNorm = 0.4656, lr_0 = 7.7651e-04
Loss = 3.6112e-02, PNorm = 108.4866, GNorm = 0.3819, lr_0 = 7.7598e-04
Loss = 3.6646e-02, PNorm = 108.5415, GNorm = 0.3077, lr_0 = 7.7545e-04
Loss = 2.9702e-02, PNorm = 108.5949, GNorm = 0.1717, lr_0 = 7.7492e-04
Loss = 3.5739e-02, PNorm = 108.6461, GNorm = 0.4950, lr_0 = 7.7439e-04
Loss = 3.5271e-02, PNorm = 108.6967, GNorm = 0.5893, lr_0 = 7.7386e-04
Loss = 3.2696e-02, PNorm = 108.7428, GNorm = 0.3133, lr_0 = 7.7333e-04
Loss = 3.4568e-02, PNorm = 108.7868, GNorm = 0.2307, lr_0 = 7.7280e-04
Loss = 3.7235e-02, PNorm = 108.8391, GNorm = 0.6548, lr_0 = 7.7227e-04
Loss = 3.3273e-02, PNorm = 108.8894, GNorm = 0.5380, lr_0 = 7.7174e-04
Loss = 3.2643e-02, PNorm = 108.9410, GNorm = 0.2379, lr_0 = 7.7121e-04
Loss = 3.1506e-02, PNorm = 108.9916, GNorm = 0.4853, lr_0 = 7.7068e-04
Loss = 4.0542e-02, PNorm = 109.0418, GNorm = 0.7414, lr_0 = 7.7015e-04
Loss = 3.2025e-02, PNorm = 109.0926, GNorm = 0.5160, lr_0 = 7.6963e-04
Loss = 4.1147e-02, PNorm = 109.1509, GNorm = 0.2855, lr_0 = 7.6910e-04
Loss = 3.1763e-02, PNorm = 109.2068, GNorm = 0.3370, lr_0 = 7.6857e-04
Loss = 3.4967e-02, PNorm = 109.2608, GNorm = 0.4696, lr_0 = 7.6805e-04
Loss = 3.1380e-02, PNorm = 109.3136, GNorm = 0.2265, lr_0 = 7.6752e-04
Loss = 3.2160e-02, PNorm = 109.3636, GNorm = 0.2478, lr_0 = 7.6699e-04
Loss = 3.5824e-02, PNorm = 109.4154, GNorm = 0.3481, lr_0 = 7.6647e-04
Loss = 2.9339e-02, PNorm = 109.4715, GNorm = 0.4520, lr_0 = 7.6594e-04
Loss = 3.0703e-02, PNorm = 109.5201, GNorm = 0.8252, lr_0 = 7.6542e-04
Loss = 3.5499e-02, PNorm = 109.5655, GNorm = 0.5019, lr_0 = 7.6489e-04
Loss = 3.3231e-02, PNorm = 109.6211, GNorm = 0.2716, lr_0 = 7.6437e-04
Loss = 3.1371e-02, PNorm = 109.6741, GNorm = 0.2510, lr_0 = 7.6385e-04
Loss = 3.1144e-02, PNorm = 109.7174, GNorm = 0.3911, lr_0 = 7.6332e-04
Loss = 3.7376e-02, PNorm = 109.7691, GNorm = 0.5049, lr_0 = 7.6280e-04
Loss = 3.1373e-02, PNorm = 109.8322, GNorm = 0.4442, lr_0 = 7.6228e-04
Loss = 3.3110e-02, PNorm = 109.8863, GNorm = 0.3600, lr_0 = 7.6176e-04
Loss = 3.6423e-02, PNorm = 109.9375, GNorm = 0.6889, lr_0 = 7.6123e-04
Loss = 3.4016e-02, PNorm = 109.9857, GNorm = 0.6368, lr_0 = 7.6071e-04
Loss = 3.7852e-02, PNorm = 110.0356, GNorm = 0.5159, lr_0 = 7.6019e-04
Loss = 3.6175e-02, PNorm = 110.0923, GNorm = 0.5926, lr_0 = 7.5967e-04
Loss = 3.9092e-02, PNorm = 110.1463, GNorm = 0.4603, lr_0 = 7.5915e-04
Loss = 4.0483e-02, PNorm = 110.2080, GNorm = 0.3790, lr_0 = 7.5863e-04
Loss = 4.3139e-02, PNorm = 110.2597, GNorm = 0.4234, lr_0 = 7.5811e-04
Loss = 3.5849e-02, PNorm = 110.3141, GNorm = 0.4961, lr_0 = 7.5759e-04
Loss = 3.6195e-02, PNorm = 110.3730, GNorm = 0.4405, lr_0 = 7.5707e-04
Loss = 3.8866e-02, PNorm = 110.4359, GNorm = 0.6180, lr_0 = 7.5655e-04
Loss = 3.7087e-02, PNorm = 110.5073, GNorm = 0.3625, lr_0 = 7.5603e-04
Loss = 3.5184e-02, PNorm = 110.5647, GNorm = 0.6308, lr_0 = 7.5552e-04
Loss = 3.6721e-02, PNorm = 110.6188, GNorm = 0.3588, lr_0 = 7.5500e-04
Loss = 3.6775e-02, PNorm = 110.6757, GNorm = 0.5581, lr_0 = 7.5448e-04
Loss = 3.8454e-02, PNorm = 110.7328, GNorm = 0.5097, lr_0 = 7.5397e-04
Loss = 3.4278e-02, PNorm = 110.7930, GNorm = 0.6291, lr_0 = 7.5345e-04
Loss = 3.1775e-02, PNorm = 110.8515, GNorm = 0.4068, lr_0 = 7.5293e-04
Loss = 3.7216e-02, PNorm = 110.9103, GNorm = 0.6466, lr_0 = 7.5242e-04
Loss = 3.6760e-02, PNorm = 110.9760, GNorm = 0.3542, lr_0 = 7.5190e-04
Loss = 3.4155e-02, PNorm = 111.0434, GNorm = 0.3114, lr_0 = 7.5139e-04
Loss = 4.1502e-02, PNorm = 111.1033, GNorm = 0.3367, lr_0 = 7.5087e-04
Loss = 3.4888e-02, PNorm = 111.1746, GNorm = 0.4788, lr_0 = 7.5036e-04
Loss = 3.9773e-02, PNorm = 111.2427, GNorm = 0.8470, lr_0 = 7.4984e-04
Loss = 3.6730e-02, PNorm = 111.3096, GNorm = 0.4936, lr_0 = 7.4933e-04
Loss = 3.5636e-02, PNorm = 111.3724, GNorm = 0.7359, lr_0 = 7.4882e-04
Loss = 3.7360e-02, PNorm = 111.4329, GNorm = 0.5489, lr_0 = 7.4830e-04
Loss = 4.0314e-02, PNorm = 111.4926, GNorm = 0.5132, lr_0 = 7.4779e-04
Loss = 4.4273e-02, PNorm = 111.5606, GNorm = 0.5319, lr_0 = 7.4728e-04
Loss = 4.3433e-02, PNorm = 111.6377, GNorm = 1.3258, lr_0 = 7.4677e-04
Loss = 4.3987e-02, PNorm = 111.7228, GNorm = 0.4079, lr_0 = 7.4625e-04
Loss = 4.2039e-02, PNorm = 111.7997, GNorm = 0.4315, lr_0 = 7.4574e-04
Loss = 4.2318e-02, PNorm = 111.8723, GNorm = 0.6095, lr_0 = 7.4523e-04
Loss = 3.7379e-02, PNorm = 111.9291, GNorm = 0.4029, lr_0 = 7.4472e-04
Loss = 3.3704e-02, PNorm = 111.9863, GNorm = 0.4325, lr_0 = 7.4421e-04
Loss = 4.1375e-02, PNorm = 112.0433, GNorm = 0.3237, lr_0 = 7.4370e-04
Loss = 4.0507e-02, PNorm = 112.1023, GNorm = 0.2883, lr_0 = 7.4319e-04
Loss = 4.1503e-02, PNorm = 112.1628, GNorm = 0.8881, lr_0 = 7.4268e-04
Loss = 3.5633e-02, PNorm = 112.2190, GNorm = 0.5465, lr_0 = 7.4217e-04
Loss = 4.5400e-02, PNorm = 112.2825, GNorm = 0.4120, lr_0 = 7.4167e-04
Loss = 3.9193e-02, PNorm = 112.3445, GNorm = 0.5259, lr_0 = 7.4116e-04
Loss = 3.7504e-02, PNorm = 112.4065, GNorm = 0.6492, lr_0 = 7.4065e-04
Loss = 3.3117e-02, PNorm = 112.4641, GNorm = 0.5308, lr_0 = 7.4014e-04
Loss = 3.2544e-02, PNorm = 112.5231, GNorm = 0.7344, lr_0 = 7.3964e-04
Loss = 4.2924e-02, PNorm = 112.5886, GNorm = 0.3319, lr_0 = 7.3913e-04
Loss = 3.6915e-02, PNorm = 112.6600, GNorm = 0.2256, lr_0 = 7.3862e-04
Loss = 3.8131e-02, PNorm = 112.7290, GNorm = 0.3269, lr_0 = 7.3812e-04
Loss = 3.4899e-02, PNorm = 112.7874, GNorm = 0.3969, lr_0 = 7.3761e-04
Loss = 3.9005e-02, PNorm = 112.8558, GNorm = 0.3924, lr_0 = 7.3711e-04
Loss = 3.7063e-02, PNorm = 112.9151, GNorm = 0.4922, lr_0 = 7.3660e-04
Loss = 3.3541e-02, PNorm = 112.9756, GNorm = 0.4391, lr_0 = 7.3610e-04
Loss = 3.6182e-02, PNorm = 113.0323, GNorm = 0.4107, lr_0 = 7.3559e-04
Loss = 3.5923e-02, PNorm = 113.1001, GNorm = 0.7060, lr_0 = 7.3509e-04
Loss = 4.2770e-02, PNorm = 113.1661, GNorm = 0.3234, lr_0 = 7.3458e-04
Loss = 3.6774e-02, PNorm = 113.2270, GNorm = 0.5557, lr_0 = 7.3408e-04
Loss = 3.3430e-02, PNorm = 113.2933, GNorm = 0.6178, lr_0 = 7.3358e-04
Loss = 4.0292e-02, PNorm = 113.3575, GNorm = 0.3693, lr_0 = 7.3308e-04
Loss = 4.3478e-02, PNorm = 113.4161, GNorm = 0.7750, lr_0 = 7.3257e-04
Loss = 4.1519e-02, PNorm = 113.4816, GNorm = 0.5557, lr_0 = 7.3207e-04
Loss = 4.7894e-02, PNorm = 113.5578, GNorm = 0.2850, lr_0 = 7.3157e-04
Loss = 3.8398e-02, PNorm = 113.6377, GNorm = 0.3798, lr_0 = 7.3107e-04
Loss = 3.7819e-02, PNorm = 113.7070, GNorm = 0.2856, lr_0 = 7.3057e-04
Loss = 4.0839e-02, PNorm = 113.7718, GNorm = 0.6363, lr_0 = 7.3007e-04
Loss = 4.6259e-02, PNorm = 113.8460, GNorm = 0.3464, lr_0 = 7.2957e-04
Loss = 3.4487e-02, PNorm = 113.9123, GNorm = 0.3081, lr_0 = 7.2907e-04
Loss = 3.2498e-02, PNorm = 113.9700, GNorm = 0.2671, lr_0 = 7.2857e-04
Loss = 3.7585e-02, PNorm = 114.0424, GNorm = 0.6215, lr_0 = 7.2807e-04
Loss = 3.5090e-02, PNorm = 114.1078, GNorm = 0.3513, lr_0 = 7.2757e-04
Loss = 3.5883e-02, PNorm = 114.1754, GNorm = 0.2668, lr_0 = 7.2707e-04
Loss = 4.2559e-02, PNorm = 114.2383, GNorm = 0.3090, lr_0 = 7.2657e-04
Loss = 4.3042e-02, PNorm = 114.3029, GNorm = 0.6455, lr_0 = 7.2608e-04
Loss = 4.0241e-02, PNorm = 114.3783, GNorm = 0.5679, lr_0 = 7.2558e-04
Loss = 3.9285e-02, PNorm = 114.4394, GNorm = 0.3534, lr_0 = 7.2508e-04
Loss = 3.8835e-02, PNorm = 114.5060, GNorm = 0.5098, lr_0 = 7.2458e-04
Loss = 4.0899e-02, PNorm = 114.5681, GNorm = 0.2938, lr_0 = 7.2409e-04
Loss = 3.7208e-02, PNorm = 114.6384, GNorm = 0.7643, lr_0 = 7.2359e-04
Loss = 3.7050e-02, PNorm = 114.7095, GNorm = 0.3136, lr_0 = 7.2310e-04
Loss = 4.2438e-02, PNorm = 114.7816, GNorm = 0.8029, lr_0 = 7.2260e-04
Loss = 3.6617e-02, PNorm = 114.8426, GNorm = 0.5685, lr_0 = 7.2211e-04
Loss = 4.5643e-02, PNorm = 114.9116, GNorm = 0.3059, lr_0 = 7.2161e-04
Loss = 4.1914e-02, PNorm = 114.9824, GNorm = 0.4280, lr_0 = 7.2112e-04
Loss = 4.2980e-02, PNorm = 115.0551, GNorm = 0.5031, lr_0 = 7.2062e-04
Loss = 3.4936e-02, PNorm = 115.1264, GNorm = 0.2622, lr_0 = 7.2013e-04
Loss = 4.1287e-02, PNorm = 115.1889, GNorm = 0.4638, lr_0 = 7.1964e-04
Validation mae = 0.495276
Epoch 6
Loss = 3.1563e-02, PNorm = 115.2491, GNorm = 0.6754, lr_0 = 7.1914e-04
Loss = 3.4303e-02, PNorm = 115.2939, GNorm = 0.3948, lr_0 = 7.1865e-04
Loss = 2.8243e-02, PNorm = 115.3402, GNorm = 0.7644, lr_0 = 7.1816e-04
Loss = 3.4212e-02, PNorm = 115.3819, GNorm = 0.4146, lr_0 = 7.1767e-04
Loss = 3.1120e-02, PNorm = 115.4395, GNorm = 0.3026, lr_0 = 7.1717e-04
Loss = 3.0421e-02, PNorm = 115.4922, GNorm = 0.2572, lr_0 = 7.1668e-04
Loss = 2.7239e-02, PNorm = 115.5366, GNorm = 0.2206, lr_0 = 7.1619e-04
Loss = 2.8830e-02, PNorm = 115.5949, GNorm = 0.3464, lr_0 = 7.1570e-04
Loss = 3.1231e-02, PNorm = 115.6436, GNorm = 0.5513, lr_0 = 7.1521e-04
Loss = 2.8224e-02, PNorm = 115.6960, GNorm = 0.4547, lr_0 = 7.1472e-04
Loss = 2.7492e-02, PNorm = 115.7404, GNorm = 0.2388, lr_0 = 7.1423e-04
Loss = 2.8218e-02, PNorm = 115.7855, GNorm = 0.4243, lr_0 = 7.1374e-04
Loss = 2.7335e-02, PNorm = 115.8299, GNorm = 0.2115, lr_0 = 7.1325e-04
Loss = 2.9128e-02, PNorm = 115.8755, GNorm = 0.2319, lr_0 = 7.1277e-04
Loss = 2.8520e-02, PNorm = 115.9184, GNorm = 0.5070, lr_0 = 7.1228e-04
Loss = 2.8345e-02, PNorm = 115.9684, GNorm = 0.2124, lr_0 = 7.1179e-04
Loss = 2.8630e-02, PNorm = 116.0220, GNorm = 0.4215, lr_0 = 7.1130e-04
Loss = 3.1567e-02, PNorm = 116.0683, GNorm = 0.3654, lr_0 = 7.1081e-04
Loss = 2.4858e-02, PNorm = 116.1188, GNorm = 0.2775, lr_0 = 7.1033e-04
Loss = 2.7703e-02, PNorm = 116.1645, GNorm = 0.1988, lr_0 = 7.0984e-04
Loss = 2.6601e-02, PNorm = 116.2113, GNorm = 0.1991, lr_0 = 7.0935e-04
Loss = 2.6139e-02, PNorm = 116.2588, GNorm = 0.5461, lr_0 = 7.0887e-04
Loss = 2.7738e-02, PNorm = 116.3130, GNorm = 0.5011, lr_0 = 7.0838e-04
Loss = 3.1353e-02, PNorm = 116.3661, GNorm = 0.3193, lr_0 = 7.0790e-04
Loss = 2.4713e-02, PNorm = 116.4038, GNorm = 0.2655, lr_0 = 7.0741e-04
Loss = 2.5163e-02, PNorm = 116.4517, GNorm = 0.3119, lr_0 = 7.0693e-04
Loss = 2.7699e-02, PNorm = 116.4920, GNorm = 0.3079, lr_0 = 7.0644e-04
Loss = 2.8408e-02, PNorm = 116.5350, GNorm = 0.3781, lr_0 = 7.0596e-04
Loss = 2.5437e-02, PNorm = 116.5825, GNorm = 0.2563, lr_0 = 7.0548e-04
Loss = 2.5834e-02, PNorm = 116.6263, GNorm = 0.2567, lr_0 = 7.0499e-04
Loss = 2.7071e-02, PNorm = 116.6714, GNorm = 0.5782, lr_0 = 7.0451e-04
Loss = 2.7799e-02, PNorm = 116.7192, GNorm = 0.5432, lr_0 = 7.0403e-04
Loss = 2.5743e-02, PNorm = 116.7613, GNorm = 0.6214, lr_0 = 7.0354e-04
Loss = 2.5351e-02, PNorm = 116.8057, GNorm = 0.3476, lr_0 = 7.0306e-04
Loss = 3.1362e-02, PNorm = 116.8506, GNorm = 0.3823, lr_0 = 7.0258e-04
Loss = 2.7770e-02, PNorm = 116.8994, GNorm = 0.4047, lr_0 = 7.0210e-04
Loss = 2.8188e-02, PNorm = 116.9469, GNorm = 0.3929, lr_0 = 7.0162e-04
Loss = 2.5881e-02, PNorm = 116.9824, GNorm = 0.4754, lr_0 = 7.0114e-04
Loss = 3.2448e-02, PNorm = 117.0334, GNorm = 0.4342, lr_0 = 7.0066e-04
Loss = 2.5988e-02, PNorm = 117.0715, GNorm = 0.7153, lr_0 = 7.0018e-04
Loss = 2.7434e-02, PNorm = 117.1184, GNorm = 0.4119, lr_0 = 6.9970e-04
Loss = 2.7023e-02, PNorm = 117.1577, GNorm = 0.3828, lr_0 = 6.9922e-04
Loss = 2.7980e-02, PNorm = 117.2055, GNorm = 0.7023, lr_0 = 6.9874e-04
Loss = 2.9139e-02, PNorm = 117.2524, GNorm = 0.8369, lr_0 = 6.9826e-04
Loss = 2.7504e-02, PNorm = 117.2925, GNorm = 0.6968, lr_0 = 6.9778e-04
Loss = 2.6915e-02, PNorm = 117.3398, GNorm = 0.2049, lr_0 = 6.9730e-04
Loss = 2.2954e-02, PNorm = 117.3931, GNorm = 0.5461, lr_0 = 6.9683e-04
Loss = 2.6512e-02, PNorm = 117.4428, GNorm = 0.3123, lr_0 = 6.9635e-04
Loss = 3.1729e-02, PNorm = 117.4875, GNorm = 0.4820, lr_0 = 6.9587e-04
Loss = 2.5827e-02, PNorm = 117.5458, GNorm = 0.3330, lr_0 = 6.9540e-04
Loss = 2.4514e-02, PNorm = 117.6002, GNorm = 0.4516, lr_0 = 6.9492e-04
Loss = 2.8644e-02, PNorm = 117.6510, GNorm = 0.2251, lr_0 = 6.9444e-04
Loss = 3.8040e-02, PNorm = 117.6984, GNorm = 0.4122, lr_0 = 6.9397e-04
Loss = 2.7530e-02, PNorm = 117.7550, GNorm = 0.6355, lr_0 = 6.9349e-04
Loss = 2.6524e-02, PNorm = 117.8152, GNorm = 0.1969, lr_0 = 6.9302e-04
Loss = 2.7656e-02, PNorm = 117.8642, GNorm = 0.2358, lr_0 = 6.9254e-04
Loss = 2.6362e-02, PNorm = 117.9093, GNorm = 0.3648, lr_0 = 6.9207e-04
Loss = 2.8186e-02, PNorm = 117.9622, GNorm = 0.2524, lr_0 = 6.9159e-04
Loss = 2.8149e-02, PNorm = 118.0182, GNorm = 0.3681, lr_0 = 6.9112e-04
Loss = 2.6601e-02, PNorm = 118.0643, GNorm = 0.1878, lr_0 = 6.9065e-04
Loss = 3.0309e-02, PNorm = 118.1108, GNorm = 0.3434, lr_0 = 6.9017e-04
Loss = 2.6143e-02, PNorm = 118.1638, GNorm = 0.2386, lr_0 = 6.8970e-04
Loss = 2.5462e-02, PNorm = 118.2133, GNorm = 0.3068, lr_0 = 6.8923e-04
Loss = 2.7018e-02, PNorm = 118.2629, GNorm = 0.4559, lr_0 = 6.8876e-04
Loss = 2.8275e-02, PNorm = 118.3106, GNorm = 0.4651, lr_0 = 6.8828e-04
Loss = 2.6154e-02, PNorm = 118.3660, GNorm = 0.4716, lr_0 = 6.8781e-04
Loss = 2.9524e-02, PNorm = 118.4220, GNorm = 0.2967, lr_0 = 6.8734e-04
Loss = 2.6793e-02, PNorm = 118.4743, GNorm = 0.3376, lr_0 = 6.8687e-04
Loss = 2.6309e-02, PNorm = 118.5286, GNorm = 0.4046, lr_0 = 6.8640e-04
Loss = 3.0755e-02, PNorm = 118.5802, GNorm = 0.4247, lr_0 = 6.8593e-04
Loss = 2.9982e-02, PNorm = 118.6409, GNorm = 0.6710, lr_0 = 6.8546e-04
Loss = 3.2284e-02, PNorm = 118.6922, GNorm = 0.3082, lr_0 = 6.8499e-04
Loss = 3.1590e-02, PNorm = 118.7440, GNorm = 0.3469, lr_0 = 6.8452e-04
Loss = 3.0225e-02, PNorm = 118.7976, GNorm = 0.4569, lr_0 = 6.8405e-04
Loss = 3.2145e-02, PNorm = 118.8509, GNorm = 0.6792, lr_0 = 6.8358e-04
Loss = 2.8365e-02, PNorm = 118.8974, GNorm = 0.3018, lr_0 = 6.8312e-04
Loss = 3.2821e-02, PNorm = 118.9506, GNorm = 0.2950, lr_0 = 6.8265e-04
Loss = 3.1659e-02, PNorm = 119.0137, GNorm = 0.2908, lr_0 = 6.8218e-04
Loss = 3.1860e-02, PNorm = 119.0698, GNorm = 0.2837, lr_0 = 6.8171e-04
Loss = 3.1622e-02, PNorm = 119.1310, GNorm = 0.8848, lr_0 = 6.8125e-04
Loss = 3.3983e-02, PNorm = 119.1890, GNorm = 0.4467, lr_0 = 6.8078e-04
Loss = 2.9880e-02, PNorm = 119.2509, GNorm = 0.2559, lr_0 = 6.8031e-04
Loss = 2.5688e-02, PNorm = 119.3048, GNorm = 0.2465, lr_0 = 6.7985e-04
Loss = 3.0522e-02, PNorm = 119.3589, GNorm = 0.7416, lr_0 = 6.7938e-04
Loss = 2.6374e-02, PNorm = 119.4169, GNorm = 0.3034, lr_0 = 6.7892e-04
Loss = 3.4347e-02, PNorm = 119.4767, GNorm = 0.4106, lr_0 = 6.7845e-04
Loss = 2.6566e-02, PNorm = 119.5364, GNorm = 0.3089, lr_0 = 6.7799e-04
Loss = 3.1015e-02, PNorm = 119.5952, GNorm = 0.4397, lr_0 = 6.7752e-04
Loss = 3.3235e-02, PNorm = 119.6588, GNorm = 0.3584, lr_0 = 6.7706e-04
Loss = 2.8115e-02, PNorm = 119.7174, GNorm = 0.1978, lr_0 = 6.7659e-04
Loss = 2.9156e-02, PNorm = 119.7737, GNorm = 0.4908, lr_0 = 6.7613e-04
Loss = 3.0481e-02, PNorm = 119.8294, GNorm = 0.5684, lr_0 = 6.7567e-04
Loss = 2.6660e-02, PNorm = 119.8910, GNorm = 0.4557, lr_0 = 6.7520e-04
Loss = 3.0703e-02, PNorm = 119.9543, GNorm = 0.2986, lr_0 = 6.7474e-04
Loss = 3.7209e-02, PNorm = 120.0093, GNorm = 0.4624, lr_0 = 6.7428e-04
Loss = 3.0109e-02, PNorm = 120.0735, GNorm = 0.4384, lr_0 = 6.7382e-04
Loss = 3.5207e-02, PNorm = 120.1415, GNorm = 0.3971, lr_0 = 6.7335e-04
Loss = 3.4154e-02, PNorm = 120.2116, GNorm = 0.5580, lr_0 = 6.7289e-04
Loss = 2.8030e-02, PNorm = 120.2702, GNorm = 0.2906, lr_0 = 6.7243e-04
Loss = 3.1316e-02, PNorm = 120.3247, GNorm = 0.6951, lr_0 = 6.7197e-04
Loss = 2.9317e-02, PNorm = 120.3851, GNorm = 0.6142, lr_0 = 6.7151e-04
Loss = 3.3382e-02, PNorm = 120.4495, GNorm = 0.3129, lr_0 = 6.7105e-04
Loss = 2.8540e-02, PNorm = 120.5102, GNorm = 0.3820, lr_0 = 6.7059e-04
Loss = 3.1007e-02, PNorm = 120.5749, GNorm = 0.2523, lr_0 = 6.7013e-04
Loss = 3.5681e-02, PNorm = 120.6290, GNorm = 0.5391, lr_0 = 6.6967e-04
Loss = 3.2494e-02, PNorm = 120.6836, GNorm = 0.1956, lr_0 = 6.6921e-04
Loss = 3.0408e-02, PNorm = 120.7417, GNorm = 0.3973, lr_0 = 6.6876e-04
Loss = 2.9192e-02, PNorm = 120.8032, GNorm = 0.4594, lr_0 = 6.6830e-04
Loss = 3.0936e-02, PNorm = 120.8602, GNorm = 0.2592, lr_0 = 6.6784e-04
Loss = 3.2750e-02, PNorm = 120.9262, GNorm = 0.2699, lr_0 = 6.6738e-04
Loss = 3.1401e-02, PNorm = 120.9917, GNorm = 0.3982, lr_0 = 6.6693e-04
Loss = 3.0440e-02, PNorm = 121.0519, GNorm = 0.1911, lr_0 = 6.6647e-04
Loss = 3.0128e-02, PNorm = 121.1110, GNorm = 0.1776, lr_0 = 6.6601e-04
Loss = 3.6588e-02, PNorm = 121.1668, GNorm = 0.3854, lr_0 = 6.6556e-04
Loss = 3.1073e-02, PNorm = 121.2352, GNorm = 0.2135, lr_0 = 6.6510e-04
Loss = 3.3864e-02, PNorm = 121.3002, GNorm = 0.5941, lr_0 = 6.6464e-04
Loss = 3.0306e-02, PNorm = 121.3605, GNorm = 0.2317, lr_0 = 6.6419e-04
Loss = 3.2729e-02, PNorm = 121.4229, GNorm = 0.8419, lr_0 = 6.6373e-04
Loss = 2.9548e-02, PNorm = 121.4873, GNorm = 0.4840, lr_0 = 6.6328e-04
Loss = 3.1481e-02, PNorm = 121.5425, GNorm = 0.3932, lr_0 = 6.6282e-04
Validation mae = 0.485236
Epoch 7
Loss = 2.4114e-02, PNorm = 121.5876, GNorm = 0.4564, lr_0 = 6.6237e-04
Loss = 2.6338e-02, PNorm = 121.6316, GNorm = 0.1776, lr_0 = 6.6192e-04
Loss = 2.1515e-02, PNorm = 121.6730, GNorm = 0.2675, lr_0 = 6.6146e-04
Loss = 2.2592e-02, PNorm = 121.7133, GNorm = 0.1911, lr_0 = 6.6101e-04
Loss = 2.4332e-02, PNorm = 121.7585, GNorm = 0.3835, lr_0 = 6.6056e-04
Loss = 2.2595e-02, PNorm = 121.7936, GNorm = 0.4669, lr_0 = 6.6011e-04
Loss = 2.3409e-02, PNorm = 121.8280, GNorm = 0.7761, lr_0 = 6.5965e-04
Loss = 2.3936e-02, PNorm = 121.8578, GNorm = 0.2244, lr_0 = 6.5920e-04
Loss = 2.0295e-02, PNorm = 121.8894, GNorm = 0.3356, lr_0 = 6.5875e-04
Loss = 2.2975e-02, PNorm = 121.9237, GNorm = 0.1812, lr_0 = 6.5830e-04
Loss = 2.3775e-02, PNorm = 121.9645, GNorm = 0.2554, lr_0 = 6.5785e-04
Loss = 2.3315e-02, PNorm = 122.0120, GNorm = 0.3176, lr_0 = 6.5740e-04
Loss = 2.3952e-02, PNorm = 122.0496, GNorm = 0.3936, lr_0 = 6.5695e-04
Loss = 2.3268e-02, PNorm = 122.0942, GNorm = 0.2303, lr_0 = 6.5650e-04
Loss = 2.0508e-02, PNorm = 122.1399, GNorm = 0.1857, lr_0 = 6.5605e-04
Loss = 2.2290e-02, PNorm = 122.1738, GNorm = 1.0591, lr_0 = 6.5560e-04
Loss = 1.9469e-02, PNorm = 122.2136, GNorm = 0.2392, lr_0 = 6.5515e-04
Loss = 2.1392e-02, PNorm = 122.2553, GNorm = 0.4596, lr_0 = 6.5470e-04
Loss = 2.1649e-02, PNorm = 122.2969, GNorm = 0.8797, lr_0 = 6.5425e-04
Loss = 2.0200e-02, PNorm = 122.3382, GNorm = 0.3406, lr_0 = 6.5380e-04
Loss = 2.0490e-02, PNorm = 122.3759, GNorm = 0.4454, lr_0 = 6.5335e-04
Loss = 2.3400e-02, PNorm = 122.4165, GNorm = 0.1528, lr_0 = 6.5291e-04
Loss = 1.9067e-02, PNorm = 122.4517, GNorm = 0.4460, lr_0 = 6.5246e-04
Loss = 2.1635e-02, PNorm = 122.4938, GNorm = 0.3247, lr_0 = 6.5201e-04
Loss = 2.3733e-02, PNorm = 122.5346, GNorm = 0.3005, lr_0 = 6.5157e-04
Loss = 2.1907e-02, PNorm = 122.5755, GNorm = 0.4727, lr_0 = 6.5112e-04
Loss = 2.2262e-02, PNorm = 122.6188, GNorm = 0.3018, lr_0 = 6.5067e-04
Loss = 2.2458e-02, PNorm = 122.6644, GNorm = 0.2227, lr_0 = 6.5023e-04
Loss = 2.0835e-02, PNorm = 122.7049, GNorm = 0.3504, lr_0 = 6.4978e-04
Loss = 2.0523e-02, PNorm = 122.7460, GNorm = 0.5581, lr_0 = 6.4934e-04
Loss = 2.0961e-02, PNorm = 122.7835, GNorm = 0.4548, lr_0 = 6.4889e-04
Loss = 2.0084e-02, PNorm = 122.8317, GNorm = 0.3405, lr_0 = 6.4845e-04
Loss = 2.2820e-02, PNorm = 122.8755, GNorm = 0.2235, lr_0 = 6.4800e-04
Loss = 2.2649e-02, PNorm = 122.9152, GNorm = 0.4346, lr_0 = 6.4756e-04
Loss = 2.5713e-02, PNorm = 122.9467, GNorm = 0.4092, lr_0 = 6.4712e-04
Loss = 1.9919e-02, PNorm = 122.9866, GNorm = 0.4515, lr_0 = 6.4667e-04
Loss = 2.0326e-02, PNorm = 123.0220, GNorm = 0.1991, lr_0 = 6.4623e-04
Loss = 2.3929e-02, PNorm = 123.0611, GNorm = 0.3604, lr_0 = 6.4579e-04
Loss = 2.0923e-02, PNorm = 123.1095, GNorm = 0.2650, lr_0 = 6.4534e-04
Loss = 1.8980e-02, PNorm = 123.1454, GNorm = 0.4608, lr_0 = 6.4490e-04
Loss = 2.1099e-02, PNorm = 123.1892, GNorm = 0.7264, lr_0 = 6.4446e-04
Loss = 2.3129e-02, PNorm = 123.2263, GNorm = 0.5478, lr_0 = 6.4402e-04
Loss = 2.2579e-02, PNorm = 123.2778, GNorm = 0.1874, lr_0 = 6.4358e-04
Loss = 2.2143e-02, PNorm = 123.3252, GNorm = 0.4731, lr_0 = 6.4314e-04
Loss = 2.3104e-02, PNorm = 123.3682, GNorm = 0.2061, lr_0 = 6.4270e-04
Loss = 2.0892e-02, PNorm = 123.4062, GNorm = 0.5249, lr_0 = 6.4226e-04
Loss = 2.3134e-02, PNorm = 123.4519, GNorm = 0.6459, lr_0 = 6.4182e-04
Loss = 2.2220e-02, PNorm = 123.4934, GNorm = 0.3381, lr_0 = 6.4138e-04
Loss = 2.1850e-02, PNorm = 123.5302, GNorm = 0.1974, lr_0 = 6.4094e-04
Loss = 2.1633e-02, PNorm = 123.5704, GNorm = 0.4639, lr_0 = 6.4050e-04
Loss = 2.4397e-02, PNorm = 123.6218, GNorm = 0.6202, lr_0 = 6.4006e-04
Loss = 2.1428e-02, PNorm = 123.6708, GNorm = 0.6740, lr_0 = 6.3962e-04
Loss = 2.5286e-02, PNorm = 123.7105, GNorm = 0.5538, lr_0 = 6.3918e-04
Loss = 2.4635e-02, PNorm = 123.7523, GNorm = 0.4616, lr_0 = 6.3874e-04
Loss = 2.5810e-02, PNorm = 123.7992, GNorm = 0.4400, lr_0 = 6.3831e-04
Loss = 2.1007e-02, PNorm = 123.8389, GNorm = 0.5899, lr_0 = 6.3787e-04
Loss = 2.0417e-02, PNorm = 123.8819, GNorm = 0.3341, lr_0 = 6.3743e-04
Loss = 2.0698e-02, PNorm = 123.9231, GNorm = 0.4006, lr_0 = 6.3700e-04
Loss = 2.3940e-02, PNorm = 123.9683, GNorm = 0.3176, lr_0 = 6.3656e-04
Loss = 2.1001e-02, PNorm = 124.0133, GNorm = 0.2412, lr_0 = 6.3612e-04
Loss = 2.0688e-02, PNorm = 124.0585, GNorm = 0.3158, lr_0 = 6.3569e-04
Loss = 2.5837e-02, PNorm = 124.1007, GNorm = 0.2621, lr_0 = 6.3525e-04
Loss = 1.9887e-02, PNorm = 124.1460, GNorm = 0.2378, lr_0 = 6.3482e-04
Loss = 2.1991e-02, PNorm = 124.1901, GNorm = 0.4346, lr_0 = 6.3438e-04
Loss = 2.2793e-02, PNorm = 124.2347, GNorm = 0.2321, lr_0 = 6.3395e-04
Loss = 2.0161e-02, PNorm = 124.2850, GNorm = 0.4905, lr_0 = 6.3351e-04
Loss = 2.2101e-02, PNorm = 124.3280, GNorm = 0.4160, lr_0 = 6.3308e-04
Loss = 2.7723e-02, PNorm = 124.3693, GNorm = 0.6168, lr_0 = 6.3265e-04
Loss = 2.4081e-02, PNorm = 124.4153, GNorm = 0.4146, lr_0 = 6.3221e-04
Loss = 2.5410e-02, PNorm = 124.4664, GNorm = 0.3985, lr_0 = 6.3178e-04
Loss = 2.3036e-02, PNorm = 124.5210, GNorm = 0.2785, lr_0 = 6.3135e-04
Loss = 2.2149e-02, PNorm = 124.5730, GNorm = 0.2405, lr_0 = 6.3091e-04
Loss = 2.3132e-02, PNorm = 124.6164, GNorm = 0.3907, lr_0 = 6.3048e-04
Loss = 2.4340e-02, PNorm = 124.6637, GNorm = 0.3485, lr_0 = 6.3005e-04
Loss = 2.5506e-02, PNorm = 124.7125, GNorm = 0.4267, lr_0 = 6.2962e-04
Loss = 2.2603e-02, PNorm = 124.7662, GNorm = 0.1382, lr_0 = 6.2919e-04
Loss = 3.1031e-02, PNorm = 124.8155, GNorm = 0.2897, lr_0 = 6.2876e-04
Loss = 2.3461e-02, PNorm = 124.8665, GNorm = 0.3024, lr_0 = 6.2833e-04
Loss = 2.1420e-02, PNorm = 124.9183, GNorm = 0.3351, lr_0 = 6.2789e-04
Loss = 2.4393e-02, PNorm = 124.9700, GNorm = 0.1743, lr_0 = 6.2746e-04
Loss = 2.6650e-02, PNorm = 125.0223, GNorm = 0.1371, lr_0 = 6.2703e-04
Loss = 2.1814e-02, PNorm = 125.0748, GNorm = 0.2831, lr_0 = 6.2661e-04
Loss = 2.1877e-02, PNorm = 125.1205, GNorm = 0.3785, lr_0 = 6.2618e-04
Loss = 2.5704e-02, PNorm = 125.1726, GNorm = 0.3069, lr_0 = 6.2575e-04
Loss = 2.1415e-02, PNorm = 125.2256, GNorm = 0.2879, lr_0 = 6.2532e-04
Loss = 2.5351e-02, PNorm = 125.2798, GNorm = 0.1986, lr_0 = 6.2489e-04
Loss = 2.1917e-02, PNorm = 125.3330, GNorm = 0.3074, lr_0 = 6.2446e-04
Loss = 2.2906e-02, PNorm = 125.3838, GNorm = 0.6256, lr_0 = 6.2403e-04
Loss = 2.0570e-02, PNorm = 125.4304, GNorm = 0.2453, lr_0 = 6.2361e-04
Loss = 2.0852e-02, PNorm = 125.4747, GNorm = 0.5587, lr_0 = 6.2318e-04
Loss = 2.4972e-02, PNorm = 125.5238, GNorm = 0.9014, lr_0 = 6.2275e-04
Loss = 2.5188e-02, PNorm = 125.5761, GNorm = 0.2199, lr_0 = 6.2233e-04
Loss = 2.1751e-02, PNorm = 125.6351, GNorm = 0.5795, lr_0 = 6.2190e-04
Loss = 2.8607e-02, PNorm = 125.6865, GNorm = 0.3418, lr_0 = 6.2147e-04
Loss = 2.4911e-02, PNorm = 125.7338, GNorm = 0.5237, lr_0 = 6.2105e-04
Loss = 3.1534e-02, PNorm = 125.7836, GNorm = 0.2298, lr_0 = 6.2062e-04
Loss = 2.3147e-02, PNorm = 125.8288, GNorm = 0.8088, lr_0 = 6.2020e-04
Loss = 2.9118e-02, PNorm = 125.8844, GNorm = 0.8392, lr_0 = 6.1977e-04
Loss = 2.0233e-02, PNorm = 125.9406, GNorm = 0.4860, lr_0 = 6.1935e-04
Loss = 2.2745e-02, PNorm = 125.9919, GNorm = 0.2149, lr_0 = 6.1892e-04
Loss = 2.1617e-02, PNorm = 126.0395, GNorm = 0.2290, lr_0 = 6.1850e-04
Loss = 2.2972e-02, PNorm = 126.0853, GNorm = 0.3919, lr_0 = 6.1808e-04
Loss = 2.4971e-02, PNorm = 126.1285, GNorm = 0.3505, lr_0 = 6.1765e-04
Loss = 2.1347e-02, PNorm = 126.1736, GNorm = 0.8042, lr_0 = 6.1723e-04
Loss = 2.0978e-02, PNorm = 126.2225, GNorm = 0.2695, lr_0 = 6.1681e-04
Loss = 2.3989e-02, PNorm = 126.2757, GNorm = 0.3710, lr_0 = 6.1638e-04
Loss = 2.2851e-02, PNorm = 126.3342, GNorm = 0.1699, lr_0 = 6.1596e-04
Loss = 2.2421e-02, PNorm = 126.3901, GNorm = 0.1655, lr_0 = 6.1554e-04
Loss = 2.4263e-02, PNorm = 126.4457, GNorm = 0.3842, lr_0 = 6.1512e-04
Loss = 2.0629e-02, PNorm = 126.4981, GNorm = 0.1896, lr_0 = 6.1470e-04
Loss = 2.1987e-02, PNorm = 126.5436, GNorm = 0.4273, lr_0 = 6.1428e-04
Loss = 2.4977e-02, PNorm = 126.5912, GNorm = 0.3329, lr_0 = 6.1385e-04
Loss = 2.2004e-02, PNorm = 126.6513, GNorm = 0.3321, lr_0 = 6.1343e-04
Loss = 2.6615e-02, PNorm = 126.7026, GNorm = 0.1711, lr_0 = 6.1301e-04
Loss = 2.9290e-02, PNorm = 126.7507, GNorm = 0.2998, lr_0 = 6.1259e-04
Loss = 2.2523e-02, PNorm = 126.8079, GNorm = 0.5879, lr_0 = 6.1217e-04
Loss = 2.2942e-02, PNorm = 126.8576, GNorm = 0.2493, lr_0 = 6.1175e-04
Loss = 2.5395e-02, PNorm = 126.9103, GNorm = 0.3708, lr_0 = 6.1134e-04
Loss = 2.2201e-02, PNorm = 126.9574, GNorm = 0.4442, lr_0 = 6.1092e-04
Loss = 2.0336e-02, PNorm = 127.0005, GNorm = 0.2938, lr_0 = 6.1050e-04
Validation mae = 0.484332
Epoch 8
Loss = 2.4984e-02, PNorm = 127.0358, GNorm = 0.4368, lr_0 = 6.1008e-04
Loss = 1.9742e-02, PNorm = 127.0718, GNorm = 0.2299, lr_0 = 6.0966e-04
Loss = 2.6653e-02, PNorm = 127.1098, GNorm = 0.6797, lr_0 = 6.0924e-04
Loss = 2.0657e-02, PNorm = 127.1470, GNorm = 0.5380, lr_0 = 6.0883e-04
Loss = 2.3553e-02, PNorm = 127.1868, GNorm = 0.6761, lr_0 = 6.0841e-04
Loss = 2.1473e-02, PNorm = 127.2207, GNorm = 0.3517, lr_0 = 6.0799e-04
Loss = 1.8525e-02, PNorm = 127.2531, GNorm = 0.1557, lr_0 = 6.0758e-04
Loss = 1.7789e-02, PNorm = 127.2837, GNorm = 0.2622, lr_0 = 6.0716e-04
Loss = 1.7614e-02, PNorm = 127.3192, GNorm = 0.3023, lr_0 = 6.0674e-04
Loss = 1.8780e-02, PNorm = 127.3591, GNorm = 0.2192, lr_0 = 6.0633e-04
Loss = 1.9178e-02, PNorm = 127.3887, GNorm = 0.4268, lr_0 = 6.0591e-04
Loss = 1.7727e-02, PNorm = 127.4181, GNorm = 0.2084, lr_0 = 6.0550e-04
Loss = 1.8305e-02, PNorm = 127.4469, GNorm = 0.3369, lr_0 = 6.0508e-04
Loss = 1.9117e-02, PNorm = 127.4825, GNorm = 0.2215, lr_0 = 6.0467e-04
Loss = 1.8524e-02, PNorm = 127.5205, GNorm = 0.2521, lr_0 = 6.0425e-04
Loss = 2.0069e-02, PNorm = 127.5547, GNorm = 0.2304, lr_0 = 6.0384e-04
Loss = 1.8254e-02, PNorm = 127.5864, GNorm = 0.6224, lr_0 = 6.0343e-04
Loss = 1.7314e-02, PNorm = 127.6192, GNorm = 0.7357, lr_0 = 6.0301e-04
Loss = 1.5658e-02, PNorm = 127.6511, GNorm = 0.1657, lr_0 = 6.0260e-04
Loss = 1.6028e-02, PNorm = 127.6787, GNorm = 0.4174, lr_0 = 6.0219e-04
Loss = 1.6403e-02, PNorm = 127.7086, GNorm = 0.4622, lr_0 = 6.0178e-04
Loss = 1.6773e-02, PNorm = 127.7479, GNorm = 0.5581, lr_0 = 6.0136e-04
Loss = 1.8861e-02, PNorm = 127.7829, GNorm = 0.3693, lr_0 = 6.0095e-04
Loss = 1.6523e-02, PNorm = 127.8250, GNorm = 0.3219, lr_0 = 6.0054e-04
Loss = 1.6952e-02, PNorm = 127.8593, GNorm = 0.3848, lr_0 = 6.0013e-04
Loss = 1.7574e-02, PNorm = 127.8974, GNorm = 0.5465, lr_0 = 5.9972e-04
Loss = 2.0304e-02, PNorm = 127.9341, GNorm = 0.4523, lr_0 = 5.9931e-04
Loss = 1.8155e-02, PNorm = 127.9751, GNorm = 0.1290, lr_0 = 5.9890e-04
Loss = 1.7091e-02, PNorm = 128.0066, GNorm = 0.2150, lr_0 = 5.9849e-04
Loss = 1.5537e-02, PNorm = 128.0380, GNorm = 0.2448, lr_0 = 5.9808e-04
Loss = 1.7470e-02, PNorm = 128.0662, GNorm = 0.2947, lr_0 = 5.9767e-04
Loss = 1.8169e-02, PNorm = 128.1042, GNorm = 0.1421, lr_0 = 5.9726e-04
Loss = 1.8560e-02, PNorm = 128.1389, GNorm = 0.4297, lr_0 = 5.9685e-04
Loss = 1.7584e-02, PNorm = 128.1810, GNorm = 0.3059, lr_0 = 5.9644e-04
Loss = 1.8894e-02, PNorm = 128.2185, GNorm = 0.1767, lr_0 = 5.9603e-04
Loss = 1.8406e-02, PNorm = 128.2537, GNorm = 0.2805, lr_0 = 5.9562e-04
Loss = 1.9118e-02, PNorm = 128.2941, GNorm = 0.2684, lr_0 = 5.9521e-04
Loss = 2.0532e-02, PNorm = 128.3350, GNorm = 0.4257, lr_0 = 5.9481e-04
Loss = 1.6291e-02, PNorm = 128.3748, GNorm = 0.3317, lr_0 = 5.9440e-04
Loss = 1.6738e-02, PNorm = 128.4103, GNorm = 0.1977, lr_0 = 5.9399e-04
Loss = 1.9408e-02, PNorm = 128.4478, GNorm = 0.1754, lr_0 = 5.9358e-04
Loss = 1.8810e-02, PNorm = 128.4854, GNorm = 0.2345, lr_0 = 5.9318e-04
Loss = 1.6886e-02, PNorm = 128.5201, GNorm = 0.3127, lr_0 = 5.9277e-04
Loss = 1.8815e-02, PNorm = 128.5524, GNorm = 0.3716, lr_0 = 5.9236e-04
Loss = 1.6594e-02, PNorm = 128.5894, GNorm = 0.1211, lr_0 = 5.9196e-04
Loss = 1.9060e-02, PNorm = 128.6312, GNorm = 0.1355, lr_0 = 5.9155e-04
Loss = 1.9861e-02, PNorm = 128.6637, GNorm = 0.2001, lr_0 = 5.9115e-04
Loss = 2.0792e-02, PNorm = 128.7011, GNorm = 0.1756, lr_0 = 5.9074e-04
Loss = 1.9470e-02, PNorm = 128.7307, GNorm = 0.4952, lr_0 = 5.9034e-04
Loss = 1.6898e-02, PNorm = 128.7657, GNorm = 0.1333, lr_0 = 5.8993e-04
Loss = 1.7227e-02, PNorm = 128.7993, GNorm = 0.4177, lr_0 = 5.8953e-04
Loss = 1.9208e-02, PNorm = 128.8412, GNorm = 0.7862, lr_0 = 5.8913e-04
Loss = 1.8244e-02, PNorm = 128.8794, GNorm = 0.5754, lr_0 = 5.8872e-04
Loss = 1.8277e-02, PNorm = 128.9210, GNorm = 0.5319, lr_0 = 5.8832e-04
Loss = 1.7094e-02, PNorm = 128.9657, GNorm = 0.5265, lr_0 = 5.8792e-04
Loss = 2.1663e-02, PNorm = 129.0060, GNorm = 0.2032, lr_0 = 5.8751e-04
Loss = 1.6666e-02, PNorm = 129.0501, GNorm = 0.2170, lr_0 = 5.8711e-04
Loss = 1.6518e-02, PNorm = 129.0807, GNorm = 0.4265, lr_0 = 5.8671e-04
Loss = 1.6231e-02, PNorm = 129.1159, GNorm = 0.2783, lr_0 = 5.8631e-04
Loss = 1.7382e-02, PNorm = 129.1558, GNorm = 0.4269, lr_0 = 5.8591e-04
Loss = 1.8695e-02, PNorm = 129.1991, GNorm = 0.1690, lr_0 = 5.8550e-04
Loss = 1.8780e-02, PNorm = 129.2418, GNorm = 0.2604, lr_0 = 5.8510e-04
Loss = 1.9689e-02, PNorm = 129.2864, GNorm = 0.2580, lr_0 = 5.8470e-04
Loss = 1.6588e-02, PNorm = 129.3230, GNorm = 0.2596, lr_0 = 5.8430e-04
Loss = 1.7383e-02, PNorm = 129.3569, GNorm = 0.2813, lr_0 = 5.8390e-04
Loss = 1.7629e-02, PNorm = 129.3960, GNorm = 0.5372, lr_0 = 5.8350e-04
Loss = 1.7599e-02, PNorm = 129.4349, GNorm = 0.1709, lr_0 = 5.8310e-04
Loss = 1.9454e-02, PNorm = 129.4752, GNorm = 0.7280, lr_0 = 5.8270e-04
Loss = 1.8094e-02, PNorm = 129.5185, GNorm = 0.3620, lr_0 = 5.8230e-04
Loss = 1.5788e-02, PNorm = 129.5606, GNorm = 0.2315, lr_0 = 5.8190e-04
Loss = 1.8710e-02, PNorm = 129.5992, GNorm = 0.2015, lr_0 = 5.8151e-04
Loss = 1.4317e-02, PNorm = 129.6334, GNorm = 0.4584, lr_0 = 5.8111e-04
Loss = 1.7567e-02, PNorm = 129.6660, GNorm = 0.5461, lr_0 = 5.8071e-04
Loss = 2.7123e-02, PNorm = 129.6977, GNorm = 0.6028, lr_0 = 5.8031e-04
Loss = 1.9633e-02, PNorm = 129.7334, GNorm = 0.5303, lr_0 = 5.7991e-04
Loss = 2.0425e-02, PNorm = 129.7780, GNorm = 0.2020, lr_0 = 5.7952e-04
Loss = 1.6320e-02, PNorm = 129.8233, GNorm = 0.2650, lr_0 = 5.7912e-04
Loss = 1.9695e-02, PNorm = 129.8676, GNorm = 0.2710, lr_0 = 5.7872e-04
Loss = 1.8036e-02, PNorm = 129.9102, GNorm = 0.1646, lr_0 = 5.7833e-04
Loss = 1.8441e-02, PNorm = 129.9473, GNorm = 0.4926, lr_0 = 5.7793e-04
Loss = 1.5947e-02, PNorm = 129.9875, GNorm = 0.2751, lr_0 = 5.7753e-04
Loss = 1.5595e-02, PNorm = 130.0283, GNorm = 0.3877, lr_0 = 5.7714e-04
Loss = 1.7246e-02, PNorm = 130.0684, GNorm = 0.4129, lr_0 = 5.7674e-04
Loss = 1.6882e-02, PNorm = 130.1090, GNorm = 0.1693, lr_0 = 5.7635e-04
Loss = 1.9785e-02, PNorm = 130.1400, GNorm = 0.1730, lr_0 = 5.7595e-04
Loss = 1.4338e-02, PNorm = 130.1750, GNorm = 0.2175, lr_0 = 5.7556e-04
Loss = 1.7249e-02, PNorm = 130.2119, GNorm = 0.3455, lr_0 = 5.7516e-04
Loss = 1.7344e-02, PNorm = 130.2487, GNorm = 0.1568, lr_0 = 5.7477e-04
Loss = 1.8037e-02, PNorm = 130.2901, GNorm = 0.4119, lr_0 = 5.7438e-04
Loss = 1.8408e-02, PNorm = 130.3296, GNorm = 0.8493, lr_0 = 5.7398e-04
Loss = 2.0810e-02, PNorm = 130.3683, GNorm = 0.1748, lr_0 = 5.7359e-04
Loss = 1.8211e-02, PNorm = 130.4045, GNorm = 0.3048, lr_0 = 5.7320e-04
Loss = 2.0836e-02, PNorm = 130.4412, GNorm = 0.2273, lr_0 = 5.7280e-04
Loss = 1.8050e-02, PNorm = 130.4809, GNorm = 0.5682, lr_0 = 5.7241e-04
Loss = 1.9044e-02, PNorm = 130.5190, GNorm = 0.3508, lr_0 = 5.7202e-04
Loss = 1.9693e-02, PNorm = 130.5617, GNorm = 0.3732, lr_0 = 5.7163e-04
Loss = 1.9544e-02, PNorm = 130.6118, GNorm = 0.7103, lr_0 = 5.7124e-04
Loss = 2.1943e-02, PNorm = 130.6551, GNorm = 0.4548, lr_0 = 5.7084e-04
Loss = 1.7751e-02, PNorm = 130.7003, GNorm = 0.5538, lr_0 = 5.7045e-04
Loss = 1.6909e-02, PNorm = 130.7383, GNorm = 0.2244, lr_0 = 5.7006e-04
Loss = 1.7704e-02, PNorm = 130.7789, GNorm = 0.1859, lr_0 = 5.6967e-04
Loss = 2.1283e-02, PNorm = 130.8161, GNorm = 0.3743, lr_0 = 5.6928e-04
Loss = 1.7076e-02, PNorm = 130.8572, GNorm = 0.1566, lr_0 = 5.6889e-04
Loss = 2.2340e-02, PNorm = 130.8935, GNorm = 0.1985, lr_0 = 5.6850e-04
Loss = 1.6169e-02, PNorm = 130.9397, GNorm = 0.2896, lr_0 = 5.6811e-04
Loss = 1.9259e-02, PNorm = 130.9812, GNorm = 0.8642, lr_0 = 5.6772e-04
Loss = 1.6124e-02, PNorm = 131.0213, GNorm = 0.3105, lr_0 = 5.6733e-04
Loss = 1.9743e-02, PNorm = 131.0592, GNorm = 0.2687, lr_0 = 5.6695e-04
Loss = 1.5861e-02, PNorm = 131.1023, GNorm = 0.1482, lr_0 = 5.6656e-04
Loss = 1.7867e-02, PNorm = 131.1378, GNorm = 0.2702, lr_0 = 5.6617e-04
Loss = 2.1366e-02, PNorm = 131.1854, GNorm = 0.8057, lr_0 = 5.6578e-04
Loss = 2.4480e-02, PNorm = 131.2312, GNorm = 0.2770, lr_0 = 5.6539e-04
Loss = 1.7503e-02, PNorm = 131.2736, GNorm = 0.2740, lr_0 = 5.6501e-04
Loss = 1.8262e-02, PNorm = 131.3179, GNorm = 0.4735, lr_0 = 5.6462e-04
Loss = 1.8612e-02, PNorm = 131.3593, GNorm = 0.1673, lr_0 = 5.6423e-04
Loss = 2.0561e-02, PNorm = 131.4014, GNorm = 0.2743, lr_0 = 5.6385e-04
Loss = 1.8188e-02, PNorm = 131.4481, GNorm = 0.3071, lr_0 = 5.6346e-04
Loss = 1.6928e-02, PNorm = 131.4955, GNorm = 0.4936, lr_0 = 5.6307e-04
Loss = 1.8207e-02, PNorm = 131.5279, GNorm = 0.4659, lr_0 = 5.6269e-04
Loss = 2.1191e-02, PNorm = 131.5661, GNorm = 0.2099, lr_0 = 5.6230e-04
Validation mae = 0.486038
Epoch 9
Loss = 1.7766e-02, PNorm = 131.6054, GNorm = 0.2507, lr_0 = 5.6192e-04
Loss = 1.4573e-02, PNorm = 131.6383, GNorm = 0.6150, lr_0 = 5.6153e-04
Loss = 1.9058e-02, PNorm = 131.6658, GNorm = 0.4552, lr_0 = 5.6115e-04
Loss = 1.7732e-02, PNorm = 131.6982, GNorm = 0.3722, lr_0 = 5.6076e-04
Loss = 1.7353e-02, PNorm = 131.7206, GNorm = 0.4450, lr_0 = 5.6038e-04
Loss = 1.4474e-02, PNorm = 131.7533, GNorm = 0.3762, lr_0 = 5.6000e-04
Loss = 1.5538e-02, PNorm = 131.7843, GNorm = 0.1945, lr_0 = 5.5961e-04
Loss = 1.6736e-02, PNorm = 131.8134, GNorm = 0.2702, lr_0 = 5.5923e-04
Loss = 1.6531e-02, PNorm = 131.8377, GNorm = 0.1507, lr_0 = 5.5885e-04
Loss = 1.4983e-02, PNorm = 131.8640, GNorm = 0.4174, lr_0 = 5.5846e-04
Loss = 1.3623e-02, PNorm = 131.8947, GNorm = 0.2508, lr_0 = 5.5808e-04
Loss = 1.4059e-02, PNorm = 131.9247, GNorm = 0.3419, lr_0 = 5.5770e-04
Loss = 1.4820e-02, PNorm = 131.9585, GNorm = 0.3082, lr_0 = 5.5732e-04
Loss = 1.4804e-02, PNorm = 131.9888, GNorm = 0.1741, lr_0 = 5.5693e-04
Loss = 1.4594e-02, PNorm = 132.0183, GNorm = 0.2047, lr_0 = 5.5655e-04
Loss = 1.4596e-02, PNorm = 132.0473, GNorm = 0.2788, lr_0 = 5.5617e-04
Loss = 1.3153e-02, PNorm = 132.0731, GNorm = 0.1239, lr_0 = 5.5579e-04
Loss = 1.4613e-02, PNorm = 132.1027, GNorm = 0.4423, lr_0 = 5.5541e-04
Loss = 1.4106e-02, PNorm = 132.1255, GNorm = 0.2875, lr_0 = 5.5503e-04
Loss = 1.2684e-02, PNorm = 132.1593, GNorm = 0.2556, lr_0 = 5.5465e-04
Loss = 1.3289e-02, PNorm = 132.1906, GNorm = 0.2237, lr_0 = 5.5427e-04
Loss = 1.4384e-02, PNorm = 132.2186, GNorm = 0.4259, lr_0 = 5.5389e-04
Loss = 1.3966e-02, PNorm = 132.2498, GNorm = 0.2301, lr_0 = 5.5351e-04
Loss = 1.2826e-02, PNorm = 132.2800, GNorm = 0.1274, lr_0 = 5.5313e-04
Loss = 1.5998e-02, PNorm = 132.3030, GNorm = 0.1860, lr_0 = 5.5275e-04
Loss = 1.2387e-02, PNorm = 132.3292, GNorm = 0.4010, lr_0 = 5.5237e-04
Loss = 1.4407e-02, PNorm = 132.3566, GNorm = 0.1686, lr_0 = 5.5199e-04
Loss = 1.6937e-02, PNorm = 132.3862, GNorm = 0.4195, lr_0 = 5.5162e-04
Loss = 1.4429e-02, PNorm = 132.4219, GNorm = 0.2817, lr_0 = 5.5124e-04
Loss = 1.4253e-02, PNorm = 132.4537, GNorm = 0.1760, lr_0 = 5.5086e-04
Loss = 1.4601e-02, PNorm = 132.4809, GNorm = 0.2560, lr_0 = 5.5048e-04
Loss = 1.4223e-02, PNorm = 132.5049, GNorm = 0.3026, lr_0 = 5.5011e-04
Loss = 1.2737e-02, PNorm = 132.5319, GNorm = 0.2846, lr_0 = 5.4973e-04
Loss = 1.4011e-02, PNorm = 132.5678, GNorm = 0.3845, lr_0 = 5.4935e-04
Loss = 1.5475e-02, PNorm = 132.5949, GNorm = 0.7283, lr_0 = 5.4898e-04
Loss = 1.3985e-02, PNorm = 132.6174, GNorm = 0.2599, lr_0 = 5.4860e-04
Loss = 1.3058e-02, PNorm = 132.6466, GNorm = 0.1174, lr_0 = 5.4822e-04
Loss = 1.2480e-02, PNorm = 132.6775, GNorm = 0.2798, lr_0 = 5.4785e-04
Loss = 1.4090e-02, PNorm = 132.6976, GNorm = 0.2990, lr_0 = 5.4747e-04
Loss = 1.5430e-02, PNorm = 132.7227, GNorm = 0.2693, lr_0 = 5.4710e-04
Loss = 1.4359e-02, PNorm = 132.7527, GNorm = 0.2474, lr_0 = 5.4672e-04
Loss = 1.5813e-02, PNorm = 132.7835, GNorm = 0.5344, lr_0 = 5.4635e-04
Loss = 1.5552e-02, PNorm = 132.8177, GNorm = 0.3095, lr_0 = 5.4597e-04
Loss = 1.3654e-02, PNorm = 132.8514, GNorm = 0.1684, lr_0 = 5.4560e-04
Loss = 1.5655e-02, PNorm = 132.8833, GNorm = 0.1724, lr_0 = 5.4523e-04
Loss = 1.5828e-02, PNorm = 132.9163, GNorm = 0.2072, lr_0 = 5.4485e-04
Loss = 1.3970e-02, PNorm = 132.9474, GNorm = 0.3617, lr_0 = 5.4448e-04
Loss = 1.4692e-02, PNorm = 132.9815, GNorm = 0.5144, lr_0 = 5.4411e-04
Loss = 1.5247e-02, PNorm = 133.0144, GNorm = 0.4823, lr_0 = 5.4373e-04
Loss = 1.4039e-02, PNorm = 133.0484, GNorm = 0.2882, lr_0 = 5.4336e-04
Loss = 1.7912e-02, PNorm = 133.0852, GNorm = 0.2971, lr_0 = 5.4299e-04
Loss = 1.3822e-02, PNorm = 133.1220, GNorm = 0.2401, lr_0 = 5.4262e-04
Loss = 1.4551e-02, PNorm = 133.1529, GNorm = 0.5519, lr_0 = 5.4225e-04
Loss = 1.6586e-02, PNorm = 133.1847, GNorm = 0.3221, lr_0 = 5.4187e-04
Loss = 1.3999e-02, PNorm = 133.2134, GNorm = 0.1830, lr_0 = 5.4150e-04
Loss = 1.3051e-02, PNorm = 133.2450, GNorm = 0.1964, lr_0 = 5.4113e-04
Loss = 1.2290e-02, PNorm = 133.2696, GNorm = 0.5435, lr_0 = 5.4076e-04
Loss = 1.4094e-02, PNorm = 133.3045, GNorm = 0.2893, lr_0 = 5.4039e-04
Loss = 1.2999e-02, PNorm = 133.3420, GNorm = 0.2192, lr_0 = 5.4002e-04
Loss = 1.5591e-02, PNorm = 133.3734, GNorm = 0.1285, lr_0 = 5.3965e-04
Loss = 1.2490e-02, PNorm = 133.4023, GNorm = 0.2392, lr_0 = 5.3928e-04
Loss = 1.4304e-02, PNorm = 133.4307, GNorm = 0.5988, lr_0 = 5.3891e-04
Loss = 1.5262e-02, PNorm = 133.4625, GNorm = 0.4768, lr_0 = 5.3854e-04
Loss = 1.4660e-02, PNorm = 133.5061, GNorm = 0.2709, lr_0 = 5.3817e-04
Loss = 1.3479e-02, PNorm = 133.5411, GNorm = 0.3134, lr_0 = 5.3781e-04
Loss = 1.4226e-02, PNorm = 133.5705, GNorm = 0.1423, lr_0 = 5.3744e-04
Loss = 1.3596e-02, PNorm = 133.5990, GNorm = 0.1423, lr_0 = 5.3707e-04
Loss = 1.4425e-02, PNorm = 133.6290, GNorm = 0.2279, lr_0 = 5.3670e-04
Loss = 1.7111e-02, PNorm = 133.6622, GNorm = 0.2367, lr_0 = 5.3633e-04
Loss = 1.3131e-02, PNorm = 133.7010, GNorm = 0.3107, lr_0 = 5.3597e-04
Loss = 1.2610e-02, PNorm = 133.7392, GNorm = 0.3316, lr_0 = 5.3560e-04
Loss = 1.4734e-02, PNorm = 133.7708, GNorm = 0.2786, lr_0 = 5.3523e-04
Loss = 1.2222e-02, PNorm = 133.8038, GNorm = 0.2385, lr_0 = 5.3486e-04
Loss = 1.5889e-02, PNorm = 133.8364, GNorm = 0.3111, lr_0 = 5.3450e-04
Loss = 1.5742e-02, PNorm = 133.8671, GNorm = 0.2182, lr_0 = 5.3413e-04
Loss = 1.7389e-02, PNorm = 133.8995, GNorm = 0.3415, lr_0 = 5.3377e-04
Loss = 1.7845e-02, PNorm = 133.9367, GNorm = 0.2143, lr_0 = 5.3340e-04
Loss = 1.2505e-02, PNorm = 133.9711, GNorm = 0.3469, lr_0 = 5.3304e-04
Loss = 1.4526e-02, PNorm = 134.0038, GNorm = 0.3459, lr_0 = 5.3267e-04
Loss = 1.7340e-02, PNorm = 134.0425, GNorm = 0.1384, lr_0 = 5.3231e-04
Loss = 1.3161e-02, PNorm = 134.0796, GNorm = 0.4248, lr_0 = 5.3194e-04
Loss = 1.3631e-02, PNorm = 134.1142, GNorm = 0.2874, lr_0 = 5.3158e-04
Loss = 1.3501e-02, PNorm = 134.1456, GNorm = 0.3165, lr_0 = 5.3121e-04
Loss = 1.5459e-02, PNorm = 134.1767, GNorm = 0.6055, lr_0 = 5.3085e-04
Loss = 1.3808e-02, PNorm = 134.2104, GNorm = 0.4877, lr_0 = 5.3048e-04
Loss = 1.7536e-02, PNorm = 134.2447, GNorm = 0.2817, lr_0 = 5.3012e-04
Loss = 1.3268e-02, PNorm = 134.2719, GNorm = 0.4738, lr_0 = 5.2976e-04
Loss = 1.3010e-02, PNorm = 134.3029, GNorm = 0.2981, lr_0 = 5.2939e-04
Loss = 1.6098e-02, PNorm = 134.3416, GNorm = 0.3260, lr_0 = 5.2903e-04
Loss = 1.4902e-02, PNorm = 134.3773, GNorm = 0.1334, lr_0 = 5.2867e-04
Loss = 1.4155e-02, PNorm = 134.4075, GNorm = 0.1891, lr_0 = 5.2831e-04
Loss = 1.6382e-02, PNorm = 134.4403, GNorm = 0.6977, lr_0 = 5.2795e-04
Loss = 1.5215e-02, PNorm = 134.4778, GNorm = 0.2800, lr_0 = 5.2758e-04
Loss = 1.4264e-02, PNorm = 134.5111, GNorm = 0.3330, lr_0 = 5.2722e-04
Loss = 1.4209e-02, PNorm = 134.5407, GNorm = 0.4107, lr_0 = 5.2686e-04
Loss = 1.3712e-02, PNorm = 134.5756, GNorm = 0.4133, lr_0 = 5.2650e-04
Loss = 1.6074e-02, PNorm = 134.6094, GNorm = 0.6955, lr_0 = 5.2614e-04
Loss = 1.8199e-02, PNorm = 134.6418, GNorm = 0.6461, lr_0 = 5.2578e-04
Loss = 1.5904e-02, PNorm = 134.6722, GNorm = 0.8263, lr_0 = 5.2542e-04
Loss = 2.1653e-02, PNorm = 134.7062, GNorm = 0.3056, lr_0 = 5.2506e-04
Loss = 1.7106e-02, PNorm = 134.7434, GNorm = 0.4091, lr_0 = 5.2470e-04
Loss = 1.8320e-02, PNorm = 134.7846, GNorm = 0.2532, lr_0 = 5.2434e-04
Loss = 1.3473e-02, PNorm = 134.8238, GNorm = 0.1542, lr_0 = 5.2398e-04
Loss = 1.2873e-02, PNorm = 134.8575, GNorm = 0.4035, lr_0 = 5.2362e-04
Loss = 1.2000e-02, PNorm = 134.8931, GNorm = 0.3521, lr_0 = 5.2326e-04
Loss = 1.6290e-02, PNorm = 134.9247, GNorm = 0.1766, lr_0 = 5.2290e-04
Loss = 1.4850e-02, PNorm = 134.9557, GNorm = 0.4340, lr_0 = 5.2255e-04
Loss = 1.3962e-02, PNorm = 134.9940, GNorm = 0.1979, lr_0 = 5.2219e-04
Loss = 1.7506e-02, PNorm = 135.0334, GNorm = 0.3254, lr_0 = 5.2183e-04
Loss = 1.4229e-02, PNorm = 135.0703, GNorm = 0.1138, lr_0 = 5.2147e-04
Loss = 1.2971e-02, PNorm = 135.1021, GNorm = 0.1822, lr_0 = 5.2112e-04
Loss = 1.6805e-02, PNorm = 135.1364, GNorm = 0.3558, lr_0 = 5.2076e-04
Loss = 1.3163e-02, PNorm = 135.1704, GNorm = 0.1956, lr_0 = 5.2040e-04
Loss = 1.9690e-02, PNorm = 135.2020, GNorm = 1.1348, lr_0 = 5.2005e-04
Loss = 1.3881e-02, PNorm = 135.2314, GNorm = 0.2336, lr_0 = 5.1969e-04
Loss = 1.5613e-02, PNorm = 135.2667, GNorm = 0.6661, lr_0 = 5.1933e-04
Loss = 1.5552e-02, PNorm = 135.3043, GNorm = 0.5387, lr_0 = 5.1898e-04
Loss = 1.5938e-02, PNorm = 135.3406, GNorm = 0.2958, lr_0 = 5.1862e-04
Loss = 1.5667e-02, PNorm = 135.3746, GNorm = 0.1799, lr_0 = 5.1827e-04
Loss = 1.6834e-02, PNorm = 135.4065, GNorm = 0.2511, lr_0 = 5.1791e-04
Validation mae = 0.481033
Epoch 10
Loss = 1.4466e-02, PNorm = 135.4368, GNorm = 0.1335, lr_0 = 5.1756e-04
Loss = 1.3506e-02, PNorm = 135.4653, GNorm = 0.5037, lr_0 = 5.1720e-04
Loss = 1.5660e-02, PNorm = 135.4927, GNorm = 0.2848, lr_0 = 5.1685e-04
Loss = 1.0624e-02, PNorm = 135.5157, GNorm = 0.1907, lr_0 = 5.1649e-04
Loss = 1.2857e-02, PNorm = 135.5367, GNorm = 0.1832, lr_0 = 5.1614e-04
Loss = 1.1766e-02, PNorm = 135.5622, GNorm = 0.2241, lr_0 = 5.1579e-04
Loss = 1.1408e-02, PNorm = 135.5863, GNorm = 0.2407, lr_0 = 5.1543e-04
Loss = 1.1928e-02, PNorm = 135.6085, GNorm = 0.4303, lr_0 = 5.1508e-04
Loss = 1.2204e-02, PNorm = 135.6319, GNorm = 0.2814, lr_0 = 5.1473e-04
Loss = 1.1674e-02, PNorm = 135.6572, GNorm = 0.2756, lr_0 = 5.1437e-04
Loss = 1.1838e-02, PNorm = 135.6859, GNorm = 0.1838, lr_0 = 5.1402e-04
Loss = 1.1583e-02, PNorm = 135.7112, GNorm = 0.2576, lr_0 = 5.1367e-04
Loss = 1.1562e-02, PNorm = 135.7332, GNorm = 0.6174, lr_0 = 5.1332e-04
Loss = 1.2416e-02, PNorm = 135.7596, GNorm = 0.1335, lr_0 = 5.1297e-04
Loss = 1.4052e-02, PNorm = 135.7833, GNorm = 0.4904, lr_0 = 5.1262e-04
Loss = 1.2204e-02, PNorm = 135.8030, GNorm = 0.3199, lr_0 = 5.1226e-04
Loss = 1.5180e-02, PNorm = 135.8287, GNorm = 0.3473, lr_0 = 5.1191e-04
Loss = 1.2300e-02, PNorm = 135.8534, GNorm = 0.5425, lr_0 = 5.1156e-04
Loss = 1.0754e-02, PNorm = 135.8832, GNorm = 0.3667, lr_0 = 5.1121e-04
Loss = 1.1476e-02, PNorm = 135.9053, GNorm = 0.3992, lr_0 = 5.1086e-04
Loss = 1.0929e-02, PNorm = 135.9306, GNorm = 0.2824, lr_0 = 5.1051e-04
Loss = 1.2447e-02, PNorm = 135.9534, GNorm = 0.3427, lr_0 = 5.1016e-04
Loss = 1.2503e-02, PNorm = 135.9798, GNorm = 0.8563, lr_0 = 5.0981e-04
Loss = 1.2792e-02, PNorm = 136.0021, GNorm = 0.2040, lr_0 = 5.0946e-04
Loss = 1.1465e-02, PNorm = 136.0231, GNorm = 0.1011, lr_0 = 5.0911e-04
Loss = 1.1375e-02, PNorm = 136.0472, GNorm = 0.2656, lr_0 = 5.0877e-04
Loss = 1.2197e-02, PNorm = 136.0737, GNorm = 0.3933, lr_0 = 5.0842e-04
Loss = 1.0363e-02, PNorm = 136.0996, GNorm = 0.2956, lr_0 = 5.0807e-04
Loss = 1.1183e-02, PNorm = 136.1280, GNorm = 0.1877, lr_0 = 5.0772e-04
Loss = 9.5071e-03, PNorm = 136.1522, GNorm = 0.2316, lr_0 = 5.0737e-04
Loss = 1.3235e-02, PNorm = 136.1763, GNorm = 0.6427, lr_0 = 5.0703e-04
Loss = 1.1548e-02, PNorm = 136.2042, GNorm = 0.1412, lr_0 = 5.0668e-04
Loss = 1.1278e-02, PNorm = 136.2330, GNorm = 0.1335, lr_0 = 5.0633e-04
Loss = 1.1554e-02, PNorm = 136.2624, GNorm = 0.3674, lr_0 = 5.0598e-04
Loss = 1.1808e-02, PNorm = 136.2929, GNorm = 0.1070, lr_0 = 5.0564e-04
Loss = 1.3312e-02, PNorm = 136.3181, GNorm = 0.1745, lr_0 = 5.0529e-04
Loss = 1.0721e-02, PNorm = 136.3416, GNorm = 0.1718, lr_0 = 5.0494e-04
Loss = 1.1862e-02, PNorm = 136.3618, GNorm = 0.1835, lr_0 = 5.0460e-04
Loss = 1.1463e-02, PNorm = 136.3895, GNorm = 0.2869, lr_0 = 5.0425e-04
Loss = 1.0318e-02, PNorm = 136.4130, GNorm = 0.1789, lr_0 = 5.0391e-04
Loss = 1.0774e-02, PNorm = 136.4393, GNorm = 0.1352, lr_0 = 5.0356e-04
Loss = 1.4654e-02, PNorm = 136.4616, GNorm = 0.2542, lr_0 = 5.0322e-04
Loss = 1.1714e-02, PNorm = 136.4872, GNorm = 0.1773, lr_0 = 5.0287e-04
Loss = 1.0053e-02, PNorm = 136.5133, GNorm = 0.1841, lr_0 = 5.0253e-04
Loss = 1.3060e-02, PNorm = 136.5421, GNorm = 0.3065, lr_0 = 5.0218e-04
Loss = 1.3267e-02, PNorm = 136.5672, GNorm = 0.2094, lr_0 = 5.0184e-04
Loss = 1.3468e-02, PNorm = 136.5962, GNorm = 0.2664, lr_0 = 5.0150e-04
Loss = 1.3242e-02, PNorm = 136.6220, GNorm = 0.3513, lr_0 = 5.0115e-04
Loss = 1.3104e-02, PNorm = 136.6513, GNorm = 0.2366, lr_0 = 5.0081e-04
Loss = 1.0738e-02, PNorm = 136.6819, GNorm = 0.1448, lr_0 = 5.0047e-04
Loss = 9.9175e-03, PNorm = 136.7077, GNorm = 0.3057, lr_0 = 5.0012e-04
Loss = 1.3896e-02, PNorm = 136.7371, GNorm = 0.2185, lr_0 = 4.9978e-04
Loss = 1.1955e-02, PNorm = 136.7648, GNorm = 0.4592, lr_0 = 4.9944e-04
Loss = 1.1415e-02, PNorm = 136.7870, GNorm = 0.3369, lr_0 = 4.9910e-04
Loss = 1.3814e-02, PNorm = 136.8115, GNorm = 0.2972, lr_0 = 4.9875e-04
Loss = 1.0236e-02, PNorm = 136.8395, GNorm = 0.2469, lr_0 = 4.9841e-04
Loss = 1.0681e-02, PNorm = 136.8639, GNorm = 0.2116, lr_0 = 4.9807e-04
Loss = 1.2075e-02, PNorm = 136.8909, GNorm = 0.3418, lr_0 = 4.9773e-04
Loss = 1.1674e-02, PNorm = 136.9120, GNorm = 0.2783, lr_0 = 4.9739e-04
Loss = 9.7398e-03, PNorm = 136.9371, GNorm = 0.4080, lr_0 = 4.9705e-04
Loss = 1.3477e-02, PNorm = 136.9612, GNorm = 0.1898, lr_0 = 4.9671e-04
Loss = 1.0927e-02, PNorm = 136.9894, GNorm = 0.1126, lr_0 = 4.9637e-04
Loss = 1.2102e-02, PNorm = 137.0164, GNorm = 0.1973, lr_0 = 4.9603e-04
Loss = 1.1906e-02, PNorm = 137.0462, GNorm = 0.3075, lr_0 = 4.9569e-04
Loss = 1.2703e-02, PNorm = 137.0740, GNorm = 0.3060, lr_0 = 4.9535e-04
Loss = 1.1693e-02, PNorm = 137.1047, GNorm = 0.7566, lr_0 = 4.9501e-04
Loss = 9.9633e-03, PNorm = 137.1372, GNorm = 0.1122, lr_0 = 4.9467e-04
Loss = 1.0684e-02, PNorm = 137.1687, GNorm = 0.4572, lr_0 = 4.9433e-04
Loss = 1.1249e-02, PNorm = 137.1978, GNorm = 0.2056, lr_0 = 4.9399e-04
Loss = 1.4840e-02, PNorm = 137.2210, GNorm = 0.4536, lr_0 = 4.9365e-04
Loss = 1.3246e-02, PNorm = 137.2460, GNorm = 0.3631, lr_0 = 4.9332e-04
Loss = 1.3903e-02, PNorm = 137.2728, GNorm = 0.3953, lr_0 = 4.9298e-04
Loss = 1.4523e-02, PNorm = 137.3068, GNorm = 0.2354, lr_0 = 4.9264e-04
Loss = 9.4405e-03, PNorm = 137.3365, GNorm = 0.2584, lr_0 = 4.9230e-04
Loss = 1.1297e-02, PNorm = 137.3650, GNorm = 0.2566, lr_0 = 4.9197e-04
Loss = 1.0869e-02, PNorm = 137.3911, GNorm = 0.4423, lr_0 = 4.9163e-04
Loss = 1.2029e-02, PNorm = 137.4178, GNorm = 0.3570, lr_0 = 4.9129e-04
Loss = 1.2438e-02, PNorm = 137.4464, GNorm = 0.2041, lr_0 = 4.9095e-04
Loss = 1.1287e-02, PNorm = 137.4799, GNorm = 0.1513, lr_0 = 4.9062e-04
Loss = 1.3164e-02, PNorm = 137.5099, GNorm = 0.2457, lr_0 = 4.9028e-04
Loss = 1.1058e-02, PNorm = 137.5376, GNorm = 0.2894, lr_0 = 4.8995e-04
Loss = 1.3580e-02, PNorm = 137.5670, GNorm = 0.1916, lr_0 = 4.8961e-04
Loss = 1.2696e-02, PNorm = 137.5931, GNorm = 0.2355, lr_0 = 4.8928e-04
Loss = 1.2514e-02, PNorm = 137.6237, GNorm = 0.2825, lr_0 = 4.8894e-04
Loss = 1.0758e-02, PNorm = 137.6542, GNorm = 0.4290, lr_0 = 4.8861e-04
Loss = 1.4544e-02, PNorm = 137.6832, GNorm = 0.2076, lr_0 = 4.8827e-04
Loss = 1.2395e-02, PNorm = 137.7059, GNorm = 0.2901, lr_0 = 4.8794e-04
Loss = 1.1880e-02, PNorm = 137.7308, GNorm = 0.3183, lr_0 = 4.8760e-04
Loss = 1.3332e-02, PNorm = 137.7611, GNorm = 0.3816, lr_0 = 4.8727e-04
Loss = 1.2152e-02, PNorm = 137.7905, GNorm = 0.4686, lr_0 = 4.8693e-04
Loss = 1.2166e-02, PNorm = 137.8259, GNorm = 0.4093, lr_0 = 4.8660e-04
Loss = 1.0684e-02, PNorm = 137.8560, GNorm = 0.4499, lr_0 = 4.8627e-04
Loss = 1.6967e-02, PNorm = 137.8890, GNorm = 0.1852, lr_0 = 4.8593e-04
Loss = 1.1725e-02, PNorm = 137.9192, GNorm = 0.3095, lr_0 = 4.8560e-04
Loss = 1.4538e-02, PNorm = 137.9453, GNorm = 0.5826, lr_0 = 4.8527e-04
Loss = 1.0898e-02, PNorm = 137.9737, GNorm = 0.2575, lr_0 = 4.8494e-04
Loss = 1.3600e-02, PNorm = 138.0052, GNorm = 0.1798, lr_0 = 4.8460e-04
Loss = 1.1760e-02, PNorm = 138.0318, GNorm = 0.3022, lr_0 = 4.8427e-04
Loss = 9.7036e-03, PNorm = 138.0604, GNorm = 0.1586, lr_0 = 4.8394e-04
Loss = 1.3526e-02, PNorm = 138.0915, GNorm = 0.2574, lr_0 = 4.8361e-04
Loss = 1.2581e-02, PNorm = 138.1237, GNorm = 0.4796, lr_0 = 4.8328e-04
Loss = 1.3543e-02, PNorm = 138.1561, GNorm = 0.2403, lr_0 = 4.8295e-04
Loss = 1.1896e-02, PNorm = 138.1871, GNorm = 0.1628, lr_0 = 4.8262e-04
Loss = 1.0567e-02, PNorm = 138.2200, GNorm = 0.2061, lr_0 = 4.8228e-04
Loss = 1.1631e-02, PNorm = 138.2511, GNorm = 0.4077, lr_0 = 4.8195e-04
Loss = 1.1228e-02, PNorm = 138.2832, GNorm = 0.3469, lr_0 = 4.8162e-04
Loss = 1.4220e-02, PNorm = 138.3125, GNorm = 0.3955, lr_0 = 4.8129e-04
Loss = 1.4857e-02, PNorm = 138.3450, GNorm = 0.2611, lr_0 = 4.8096e-04
Loss = 1.3633e-02, PNorm = 138.3804, GNorm = 0.5652, lr_0 = 4.8064e-04
Loss = 1.2361e-02, PNorm = 138.4155, GNorm = 0.4653, lr_0 = 4.8031e-04
Loss = 1.4896e-02, PNorm = 138.4476, GNorm = 0.3566, lr_0 = 4.7998e-04
Loss = 1.2302e-02, PNorm = 138.4772, GNorm = 0.3191, lr_0 = 4.7965e-04
Loss = 1.0553e-02, PNorm = 138.5028, GNorm = 0.2789, lr_0 = 4.7932e-04
Loss = 1.3071e-02, PNorm = 138.5321, GNorm = 0.2753, lr_0 = 4.7899e-04
Loss = 1.4314e-02, PNorm = 138.5594, GNorm = 0.2093, lr_0 = 4.7866e-04
Loss = 1.1373e-02, PNorm = 138.5916, GNorm = 0.2071, lr_0 = 4.7833e-04
Loss = 1.1436e-02, PNorm = 138.6236, GNorm = 0.2165, lr_0 = 4.7801e-04
Loss = 1.1174e-02, PNorm = 138.6527, GNorm = 0.2580, lr_0 = 4.7768e-04
Loss = 1.2517e-02, PNorm = 138.6798, GNorm = 0.4126, lr_0 = 4.7735e-04
Loss = 1.0881e-02, PNorm = 138.7051, GNorm = 0.3827, lr_0 = 4.7703e-04
Validation mae = 0.481297
Epoch 11
Loss = 1.1241e-02, PNorm = 138.7241, GNorm = 0.1181, lr_0 = 4.7670e-04
Loss = 1.2522e-02, PNorm = 138.7437, GNorm = 0.1213, lr_0 = 4.7637e-04
Loss = 1.0373e-02, PNorm = 138.7615, GNorm = 0.2352, lr_0 = 4.7605e-04
Loss = 9.5528e-03, PNorm = 138.7840, GNorm = 0.2155, lr_0 = 4.7572e-04
Loss = 9.8371e-03, PNorm = 138.8017, GNorm = 0.5441, lr_0 = 4.7539e-04
Loss = 1.0534e-02, PNorm = 138.8180, GNorm = 0.3701, lr_0 = 4.7507e-04
Loss = 8.6863e-03, PNorm = 138.8394, GNorm = 0.2301, lr_0 = 4.7474e-04
Loss = 9.3037e-03, PNorm = 138.8617, GNorm = 0.3057, lr_0 = 4.7442e-04
Loss = 1.2021e-02, PNorm = 138.8834, GNorm = 0.1831, lr_0 = 4.7409e-04
Loss = 9.1893e-03, PNorm = 138.8991, GNorm = 0.3688, lr_0 = 4.7377e-04
Loss = 1.2485e-02, PNorm = 138.9195, GNorm = 0.5907, lr_0 = 4.7344e-04
Loss = 1.1342e-02, PNorm = 138.9405, GNorm = 0.1340, lr_0 = 4.7312e-04
Loss = 8.3104e-03, PNorm = 138.9563, GNorm = 0.4255, lr_0 = 4.7279e-04
Loss = 1.2978e-02, PNorm = 138.9740, GNorm = 0.4061, lr_0 = 4.7247e-04
Loss = 1.2240e-02, PNorm = 138.9975, GNorm = 0.4988, lr_0 = 4.7215e-04
Loss = 9.3629e-03, PNorm = 139.0166, GNorm = 0.4099, lr_0 = 4.7182e-04
Loss = 1.2448e-02, PNorm = 139.0426, GNorm = 0.5647, lr_0 = 4.7150e-04
Loss = 1.0070e-02, PNorm = 139.0655, GNorm = 0.2855, lr_0 = 4.7118e-04
Loss = 1.0395e-02, PNorm = 139.0850, GNorm = 0.3435, lr_0 = 4.7085e-04
Loss = 8.5873e-03, PNorm = 139.1039, GNorm = 0.3974, lr_0 = 4.7053e-04
Loss = 9.3695e-03, PNorm = 139.1229, GNorm = 0.1983, lr_0 = 4.7021e-04
Loss = 9.2001e-03, PNorm = 139.1483, GNorm = 0.1256, lr_0 = 4.6989e-04
Loss = 1.0140e-02, PNorm = 139.1715, GNorm = 0.2938, lr_0 = 4.6957e-04
Loss = 9.9571e-03, PNorm = 139.1967, GNorm = 0.2183, lr_0 = 4.6924e-04
Loss = 8.8411e-03, PNorm = 139.2152, GNorm = 0.4080, lr_0 = 4.6892e-04
Loss = 1.2368e-02, PNorm = 139.2329, GNorm = 0.3491, lr_0 = 4.6860e-04
Loss = 9.1334e-03, PNorm = 139.2535, GNorm = 0.2293, lr_0 = 4.6828e-04
Loss = 9.8307e-03, PNorm = 139.2754, GNorm = 0.4310, lr_0 = 4.6796e-04
Loss = 1.1469e-02, PNorm = 139.2959, GNorm = 0.1229, lr_0 = 4.6764e-04
Loss = 1.0248e-02, PNorm = 139.3195, GNorm = 0.3405, lr_0 = 4.6732e-04
Loss = 1.0790e-02, PNorm = 139.3433, GNorm = 0.1067, lr_0 = 4.6700e-04
Loss = 1.0035e-02, PNorm = 139.3614, GNorm = 0.1132, lr_0 = 4.6668e-04
Loss = 1.1595e-02, PNorm = 139.3811, GNorm = 0.3527, lr_0 = 4.6636e-04
Loss = 9.9363e-03, PNorm = 139.4034, GNorm = 0.2688, lr_0 = 4.6604e-04
Loss = 9.5127e-03, PNorm = 139.4255, GNorm = 0.3948, lr_0 = 4.6572e-04
Loss = 9.0760e-03, PNorm = 139.4470, GNorm = 0.2613, lr_0 = 4.6540e-04
Loss = 9.9988e-03, PNorm = 139.4703, GNorm = 0.2232, lr_0 = 4.6508e-04
Loss = 1.0189e-02, PNorm = 139.4993, GNorm = 0.4908, lr_0 = 4.6476e-04
Loss = 9.4476e-03, PNorm = 139.5225, GNorm = 0.3315, lr_0 = 4.6445e-04
Loss = 1.2291e-02, PNorm = 139.5458, GNorm = 0.4205, lr_0 = 4.6413e-04
Loss = 8.9193e-03, PNorm = 139.5680, GNorm = 0.1145, lr_0 = 4.6381e-04
Loss = 1.1885e-02, PNorm = 139.5900, GNorm = 0.3374, lr_0 = 4.6349e-04
Loss = 9.4161e-03, PNorm = 139.6074, GNorm = 0.1859, lr_0 = 4.6317e-04
Loss = 9.4000e-03, PNorm = 139.6255, GNorm = 0.2851, lr_0 = 4.6286e-04
Loss = 9.4720e-03, PNorm = 139.6438, GNorm = 0.1102, lr_0 = 4.6254e-04
Loss = 9.8566e-03, PNorm = 139.6671, GNorm = 0.1540, lr_0 = 4.6222e-04
Loss = 9.5704e-03, PNorm = 139.6873, GNorm = 0.6165, lr_0 = 4.6191e-04
Loss = 8.6257e-03, PNorm = 139.7095, GNorm = 0.5487, lr_0 = 4.6159e-04
Loss = 8.5984e-03, PNorm = 139.7333, GNorm = 0.0891, lr_0 = 4.6127e-04
Loss = 1.0178e-02, PNorm = 139.7547, GNorm = 0.4128, lr_0 = 4.6096e-04
Loss = 9.6671e-03, PNorm = 139.7766, GNorm = 0.5596, lr_0 = 4.6064e-04
Loss = 1.1910e-02, PNorm = 139.7968, GNorm = 0.1108, lr_0 = 4.6033e-04
Loss = 9.7708e-03, PNorm = 139.8211, GNorm = 0.3646, lr_0 = 4.6001e-04
Loss = 1.0181e-02, PNorm = 139.8443, GNorm = 0.5320, lr_0 = 4.5970e-04
Loss = 9.8606e-03, PNorm = 139.8658, GNorm = 0.1508, lr_0 = 4.5938e-04
Loss = 9.8008e-03, PNorm = 139.8893, GNorm = 0.1930, lr_0 = 4.5907e-04
Loss = 1.1664e-02, PNorm = 139.9152, GNorm = 0.5886, lr_0 = 4.5875e-04
Loss = 8.9385e-03, PNorm = 139.9370, GNorm = 0.4626, lr_0 = 4.5844e-04
Loss = 1.0937e-02, PNorm = 139.9633, GNorm = 0.2714, lr_0 = 4.5812e-04
Loss = 1.0302e-02, PNorm = 139.9890, GNorm = 0.2595, lr_0 = 4.5781e-04
Loss = 1.1150e-02, PNorm = 140.0156, GNorm = 0.2030, lr_0 = 4.5750e-04
Loss = 1.1183e-02, PNorm = 140.0425, GNorm = 0.1956, lr_0 = 4.5718e-04
Loss = 9.3102e-03, PNorm = 140.0678, GNorm = 0.3568, lr_0 = 4.5687e-04
Loss = 1.2078e-02, PNorm = 140.0940, GNorm = 0.4239, lr_0 = 4.5656e-04
Loss = 1.0160e-02, PNorm = 140.1184, GNorm = 0.4476, lr_0 = 4.5624e-04
Loss = 1.2287e-02, PNorm = 140.1438, GNorm = 0.5304, lr_0 = 4.5593e-04
Loss = 8.6531e-03, PNorm = 140.1727, GNorm = 0.1533, lr_0 = 4.5562e-04
Loss = 1.1140e-02, PNorm = 140.1975, GNorm = 0.2704, lr_0 = 4.5531e-04
Loss = 9.5045e-03, PNorm = 140.2195, GNorm = 0.2410, lr_0 = 4.5499e-04
Loss = 1.0032e-02, PNorm = 140.2407, GNorm = 0.1995, lr_0 = 4.5468e-04
Loss = 9.4099e-03, PNorm = 140.2671, GNorm = 0.4074, lr_0 = 4.5437e-04
Loss = 1.0855e-02, PNorm = 140.2908, GNorm = 0.0981, lr_0 = 4.5406e-04
Loss = 1.0338e-02, PNorm = 140.3143, GNorm = 0.1401, lr_0 = 4.5375e-04
Loss = 9.5376e-03, PNorm = 140.3396, GNorm = 0.5103, lr_0 = 4.5344e-04
Loss = 9.2597e-03, PNorm = 140.3633, GNorm = 0.1635, lr_0 = 4.5313e-04
Loss = 1.1880e-02, PNorm = 140.3878, GNorm = 0.3161, lr_0 = 4.5282e-04
Loss = 1.1643e-02, PNorm = 140.4125, GNorm = 0.6219, lr_0 = 4.5251e-04
Loss = 9.1856e-03, PNorm = 140.4387, GNorm = 0.3560, lr_0 = 4.5220e-04
Loss = 9.3081e-03, PNorm = 140.4688, GNorm = 0.4144, lr_0 = 4.5189e-04
Loss = 1.0343e-02, PNorm = 140.4934, GNorm = 0.3740, lr_0 = 4.5158e-04
Loss = 1.0246e-02, PNorm = 140.5223, GNorm = 0.1635, lr_0 = 4.5127e-04
Loss = 1.0983e-02, PNorm = 140.5461, GNorm = 0.2581, lr_0 = 4.5096e-04
Loss = 1.0082e-02, PNorm = 140.5710, GNorm = 0.8703, lr_0 = 4.5065e-04
Loss = 1.0254e-02, PNorm = 140.5966, GNorm = 0.2594, lr_0 = 4.5034e-04
Loss = 8.9749e-03, PNorm = 140.6180, GNorm = 0.1074, lr_0 = 4.5003e-04
Loss = 1.0780e-02, PNorm = 140.6404, GNorm = 0.2597, lr_0 = 4.4972e-04
Loss = 9.5517e-03, PNorm = 140.6652, GNorm = 0.2221, lr_0 = 4.4942e-04
Loss = 9.9835e-03, PNorm = 140.6860, GNorm = 0.2422, lr_0 = 4.4911e-04
Loss = 8.7834e-03, PNorm = 140.7112, GNorm = 0.1873, lr_0 = 4.4880e-04
Loss = 9.8038e-03, PNorm = 140.7385, GNorm = 0.2711, lr_0 = 4.4849e-04
Loss = 8.5674e-03, PNorm = 140.7650, GNorm = 0.2326, lr_0 = 4.4819e-04
Loss = 9.3389e-03, PNorm = 140.7890, GNorm = 0.2525, lr_0 = 4.4788e-04
Loss = 1.0746e-02, PNorm = 140.8130, GNorm = 0.2608, lr_0 = 4.4757e-04
Loss = 1.0702e-02, PNorm = 140.8324, GNorm = 0.1969, lr_0 = 4.4727e-04
Loss = 8.9270e-03, PNorm = 140.8538, GNorm = 0.1478, lr_0 = 4.4696e-04
Loss = 8.6789e-03, PNorm = 140.8753, GNorm = 0.1472, lr_0 = 4.4665e-04
Loss = 8.8176e-03, PNorm = 140.8984, GNorm = 0.2394, lr_0 = 4.4635e-04
Loss = 1.4861e-02, PNorm = 140.9201, GNorm = 0.3036, lr_0 = 4.4604e-04
Loss = 1.0787e-02, PNorm = 140.9458, GNorm = 0.2767, lr_0 = 4.4574e-04
Loss = 1.1839e-02, PNorm = 140.9720, GNorm = 0.3396, lr_0 = 4.4543e-04
Loss = 1.1070e-02, PNorm = 141.0027, GNorm = 0.0831, lr_0 = 4.4513e-04
Loss = 9.9876e-03, PNorm = 141.0282, GNorm = 0.2268, lr_0 = 4.4482e-04
Loss = 1.6206e-02, PNorm = 141.0546, GNorm = 0.3118, lr_0 = 4.4452e-04
Loss = 9.3097e-03, PNorm = 141.0800, GNorm = 0.0899, lr_0 = 4.4421e-04
Loss = 8.4361e-03, PNorm = 141.1051, GNorm = 0.2178, lr_0 = 4.4391e-04
Loss = 9.1625e-03, PNorm = 141.1318, GNorm = 0.1600, lr_0 = 4.4360e-04
Loss = 8.8538e-03, PNorm = 141.1588, GNorm = 0.1060, lr_0 = 4.4330e-04
Loss = 1.1834e-02, PNorm = 141.1812, GNorm = 0.2711, lr_0 = 4.4299e-04
Loss = 1.1484e-02, PNorm = 141.2062, GNorm = 0.1175, lr_0 = 4.4269e-04
Loss = 1.0494e-02, PNorm = 141.2354, GNorm = 0.2488, lr_0 = 4.4239e-04
Loss = 9.2732e-03, PNorm = 141.2625, GNorm = 0.2844, lr_0 = 4.4209e-04
Loss = 1.0155e-02, PNorm = 141.2893, GNorm = 0.3693, lr_0 = 4.4178e-04
Loss = 1.0817e-02, PNorm = 141.3146, GNorm = 0.3451, lr_0 = 4.4148e-04
Loss = 1.1355e-02, PNorm = 141.3438, GNorm = 0.4863, lr_0 = 4.4118e-04
Loss = 1.1056e-02, PNorm = 141.3709, GNorm = 0.5711, lr_0 = 4.4088e-04
Loss = 1.0582e-02, PNorm = 141.3901, GNorm = 0.5541, lr_0 = 4.4057e-04
Loss = 1.3289e-02, PNorm = 141.4113, GNorm = 0.2111, lr_0 = 4.4027e-04
Loss = 1.2107e-02, PNorm = 141.4390, GNorm = 0.1573, lr_0 = 4.3997e-04
Loss = 1.3863e-02, PNorm = 141.4643, GNorm = 0.3141, lr_0 = 4.3967e-04
Loss = 1.2176e-02, PNorm = 141.4902, GNorm = 0.2485, lr_0 = 4.3937e-04
Validation mae = 0.477924
Epoch 12
Loss = 9.5045e-03, PNorm = 141.5169, GNorm = 0.4450, lr_0 = 4.3907e-04
Loss = 8.8088e-03, PNorm = 141.5345, GNorm = 0.1847, lr_0 = 4.3877e-04
Loss = 1.1283e-02, PNorm = 141.5504, GNorm = 0.3546, lr_0 = 4.3846e-04
Loss = 8.6644e-03, PNorm = 141.5710, GNorm = 0.2320, lr_0 = 4.3816e-04
Loss = 9.7833e-03, PNorm = 141.5931, GNorm = 0.1827, lr_0 = 4.3786e-04
Loss = 8.8103e-03, PNorm = 141.6137, GNorm = 0.1924, lr_0 = 4.3756e-04
Loss = 1.0819e-02, PNorm = 141.6422, GNorm = 0.2061, lr_0 = 4.3726e-04
Loss = 7.8636e-03, PNorm = 141.6621, GNorm = 0.2154, lr_0 = 4.3696e-04
Loss = 9.6636e-03, PNorm = 141.6773, GNorm = 0.4091, lr_0 = 4.3667e-04
Loss = 8.2364e-03, PNorm = 141.6924, GNorm = 0.1657, lr_0 = 4.3637e-04
Loss = 8.3032e-03, PNorm = 141.7087, GNorm = 0.1666, lr_0 = 4.3607e-04
Loss = 9.1576e-03, PNorm = 141.7239, GNorm = 0.2509, lr_0 = 4.3577e-04
Loss = 7.9960e-03, PNorm = 141.7384, GNorm = 0.2095, lr_0 = 4.3547e-04
Loss = 1.0792e-02, PNorm = 141.7546, GNorm = 0.2216, lr_0 = 4.3517e-04
Loss = 9.0015e-03, PNorm = 141.7733, GNorm = 0.3961, lr_0 = 4.3487e-04
Loss = 8.6082e-03, PNorm = 141.7934, GNorm = 0.2718, lr_0 = 4.3458e-04
Loss = 8.8785e-03, PNorm = 141.8094, GNorm = 0.4708, lr_0 = 4.3428e-04
Loss = 7.3235e-03, PNorm = 141.8241, GNorm = 0.2286, lr_0 = 4.3398e-04
Loss = 8.6176e-03, PNorm = 141.8364, GNorm = 0.1538, lr_0 = 4.3368e-04
Loss = 8.1834e-03, PNorm = 141.8516, GNorm = 0.3192, lr_0 = 4.3339e-04
Loss = 9.9461e-03, PNorm = 141.8700, GNorm = 0.1856, lr_0 = 4.3309e-04
Loss = 8.8764e-03, PNorm = 141.8872, GNorm = 0.1319, lr_0 = 4.3279e-04
Loss = 7.0309e-03, PNorm = 141.9067, GNorm = 0.2074, lr_0 = 4.3250e-04
Loss = 8.5163e-03, PNorm = 141.9197, GNorm = 0.3222, lr_0 = 4.3220e-04
Loss = 8.4166e-03, PNorm = 141.9405, GNorm = 0.3101, lr_0 = 4.3190e-04
Loss = 7.8591e-03, PNorm = 141.9560, GNorm = 0.3707, lr_0 = 4.3161e-04
Loss = 8.2674e-03, PNorm = 141.9764, GNorm = 0.0835, lr_0 = 4.3131e-04
Loss = 9.4608e-03, PNorm = 141.9902, GNorm = 0.0889, lr_0 = 4.3102e-04
Loss = 8.8033e-03, PNorm = 142.0015, GNorm = 0.1979, lr_0 = 4.3072e-04
Loss = 8.5232e-03, PNorm = 142.0139, GNorm = 0.2897, lr_0 = 4.3043e-04
Loss = 7.8910e-03, PNorm = 142.0297, GNorm = 0.1308, lr_0 = 4.3013e-04
Loss = 8.9504e-03, PNorm = 142.0473, GNorm = 0.2318, lr_0 = 4.2984e-04
Loss = 8.4538e-03, PNorm = 142.0670, GNorm = 0.7477, lr_0 = 4.2954e-04
Loss = 9.6434e-03, PNorm = 142.0884, GNorm = 0.2472, lr_0 = 4.2925e-04
Loss = 7.4572e-03, PNorm = 142.1093, GNorm = 0.4192, lr_0 = 4.2895e-04
Loss = 8.9795e-03, PNorm = 142.1293, GNorm = 0.2449, lr_0 = 4.2866e-04
Loss = 7.6065e-03, PNorm = 142.1511, GNorm = 0.4201, lr_0 = 4.2837e-04
Loss = 9.2919e-03, PNorm = 142.1700, GNorm = 0.1734, lr_0 = 4.2807e-04
Loss = 8.4232e-03, PNorm = 142.1888, GNorm = 0.2270, lr_0 = 4.2778e-04
Loss = 8.4271e-03, PNorm = 142.2117, GNorm = 0.1678, lr_0 = 4.2749e-04
Loss = 7.2056e-03, PNorm = 142.2315, GNorm = 0.3507, lr_0 = 4.2719e-04
Loss = 7.1499e-03, PNorm = 142.2503, GNorm = 0.1314, lr_0 = 4.2690e-04
Loss = 7.4680e-03, PNorm = 142.2621, GNorm = 0.2555, lr_0 = 4.2661e-04
Loss = 8.2393e-03, PNorm = 142.2791, GNorm = 0.2831, lr_0 = 4.2632e-04
Loss = 7.4801e-03, PNorm = 142.2956, GNorm = 0.2656, lr_0 = 4.2602e-04
Loss = 8.0907e-03, PNorm = 142.3141, GNorm = 0.5093, lr_0 = 4.2573e-04
Loss = 1.0294e-02, PNorm = 142.3326, GNorm = 0.2233, lr_0 = 4.2544e-04
Loss = 8.9800e-03, PNorm = 142.3543, GNorm = 0.2238, lr_0 = 4.2515e-04
Loss = 9.3230e-03, PNorm = 142.3741, GNorm = 0.1248, lr_0 = 4.2486e-04
Loss = 8.8914e-03, PNorm = 142.3941, GNorm = 0.2976, lr_0 = 4.2457e-04
Loss = 8.1723e-03, PNorm = 142.4091, GNorm = 0.1662, lr_0 = 4.2428e-04
Loss = 7.6686e-03, PNorm = 142.4281, GNorm = 0.2822, lr_0 = 4.2399e-04
Loss = 8.9901e-03, PNorm = 142.4470, GNorm = 0.2236, lr_0 = 4.2370e-04
Loss = 7.5169e-03, PNorm = 142.4674, GNorm = 0.2434, lr_0 = 4.2340e-04
Loss = 7.9966e-03, PNorm = 142.4907, GNorm = 0.1113, lr_0 = 4.2311e-04
Loss = 9.2636e-03, PNorm = 142.5132, GNorm = 0.2712, lr_0 = 4.2283e-04
Loss = 7.2275e-03, PNorm = 142.5302, GNorm = 0.1317, lr_0 = 4.2254e-04
Loss = 7.8743e-03, PNorm = 142.5494, GNorm = 0.1473, lr_0 = 4.2225e-04
Loss = 7.7608e-03, PNorm = 142.5673, GNorm = 0.5254, lr_0 = 4.2196e-04
Loss = 7.7783e-03, PNorm = 142.5878, GNorm = 0.0883, lr_0 = 4.2167e-04
Loss = 8.3588e-03, PNorm = 142.6075, GNorm = 0.2333, lr_0 = 4.2138e-04
Loss = 8.3057e-03, PNorm = 142.6300, GNorm = 0.0953, lr_0 = 4.2109e-04
Loss = 6.8854e-03, PNorm = 142.6506, GNorm = 0.1944, lr_0 = 4.2080e-04
Loss = 8.9389e-03, PNorm = 142.6673, GNorm = 0.0915, lr_0 = 4.2051e-04
Loss = 6.9993e-03, PNorm = 142.6842, GNorm = 0.2896, lr_0 = 4.2023e-04
Loss = 8.8646e-03, PNorm = 142.6994, GNorm = 0.0780, lr_0 = 4.1994e-04
Loss = 8.8348e-03, PNorm = 142.7167, GNorm = 0.1193, lr_0 = 4.1965e-04
Loss = 8.0065e-03, PNorm = 142.7370, GNorm = 0.0944, lr_0 = 4.1936e-04
Loss = 7.6255e-03, PNorm = 142.7613, GNorm = 0.2620, lr_0 = 4.1907e-04
Loss = 7.1940e-03, PNorm = 142.7860, GNorm = 0.2819, lr_0 = 4.1879e-04
Loss = 7.6560e-03, PNorm = 142.8074, GNorm = 0.1190, lr_0 = 4.1850e-04
Loss = 1.2305e-02, PNorm = 142.8263, GNorm = 0.2773, lr_0 = 4.1821e-04
Loss = 1.0061e-02, PNorm = 142.8440, GNorm = 0.3535, lr_0 = 4.1793e-04
Loss = 7.9586e-03, PNorm = 142.8629, GNorm = 0.2812, lr_0 = 4.1764e-04
Loss = 8.1766e-03, PNorm = 142.8850, GNorm = 0.0967, lr_0 = 4.1736e-04
Loss = 9.6455e-03, PNorm = 142.9056, GNorm = 0.6739, lr_0 = 4.1707e-04
Loss = 9.8816e-03, PNorm = 142.9219, GNorm = 0.3707, lr_0 = 4.1678e-04
Loss = 9.6010e-03, PNorm = 142.9455, GNorm = 0.2677, lr_0 = 4.1650e-04
Loss = 7.5182e-03, PNorm = 142.9667, GNorm = 0.2344, lr_0 = 4.1621e-04
Loss = 8.7603e-03, PNorm = 142.9856, GNorm = 0.1953, lr_0 = 4.1593e-04
Loss = 1.0859e-02, PNorm = 143.0011, GNorm = 0.5145, lr_0 = 4.1564e-04
Loss = 8.3251e-03, PNorm = 143.0182, GNorm = 0.3218, lr_0 = 4.1536e-04
Loss = 7.7127e-03, PNorm = 143.0386, GNorm = 0.3163, lr_0 = 4.1507e-04
Loss = 8.6354e-03, PNorm = 143.0590, GNorm = 0.1283, lr_0 = 4.1479e-04
Loss = 7.1590e-03, PNorm = 143.0839, GNorm = 0.2019, lr_0 = 4.1450e-04
Loss = 7.9436e-03, PNorm = 143.1044, GNorm = 0.1607, lr_0 = 4.1422e-04
Loss = 8.4083e-03, PNorm = 143.1193, GNorm = 0.2229, lr_0 = 4.1394e-04
Loss = 8.6379e-03, PNorm = 143.1419, GNorm = 0.4365, lr_0 = 4.1365e-04
Loss = 8.3462e-03, PNorm = 143.1648, GNorm = 0.3773, lr_0 = 4.1337e-04
Loss = 1.3666e-02, PNorm = 143.1850, GNorm = 0.4225, lr_0 = 4.1309e-04
Loss = 8.1043e-03, PNorm = 143.2116, GNorm = 0.3992, lr_0 = 4.1280e-04
Loss = 1.2331e-02, PNorm = 143.2375, GNorm = 0.1875, lr_0 = 4.1252e-04
Loss = 9.4223e-03, PNorm = 143.2600, GNorm = 0.5600, lr_0 = 4.1224e-04
Loss = 9.2795e-03, PNorm = 143.2815, GNorm = 0.1869, lr_0 = 4.1196e-04
Loss = 8.4525e-03, PNorm = 143.3061, GNorm = 0.3785, lr_0 = 4.1167e-04
Loss = 7.5415e-03, PNorm = 143.3238, GNorm = 0.1404, lr_0 = 4.1139e-04
Loss = 8.6802e-03, PNorm = 143.3428, GNorm = 0.1956, lr_0 = 4.1111e-04
Loss = 9.0837e-03, PNorm = 143.3611, GNorm = 0.6058, lr_0 = 4.1083e-04
Loss = 8.7210e-03, PNorm = 143.3803, GNorm = 0.2395, lr_0 = 4.1055e-04
Loss = 7.8719e-03, PNorm = 143.4004, GNorm = 0.3242, lr_0 = 4.1027e-04
Loss = 9.0375e-03, PNorm = 143.4212, GNorm = 0.5487, lr_0 = 4.0998e-04
Loss = 1.0702e-02, PNorm = 143.4452, GNorm = 0.4550, lr_0 = 4.0970e-04
Loss = 1.3837e-02, PNorm = 143.4663, GNorm = 0.5329, lr_0 = 4.0942e-04
Loss = 1.0028e-02, PNorm = 143.4917, GNorm = 0.1351, lr_0 = 4.0914e-04
Loss = 1.0830e-02, PNorm = 143.5213, GNorm = 0.3506, lr_0 = 4.0886e-04
Loss = 1.0327e-02, PNorm = 143.5402, GNorm = 0.3779, lr_0 = 4.0858e-04
Loss = 8.6734e-03, PNorm = 143.5602, GNorm = 0.1391, lr_0 = 4.0830e-04
Loss = 9.7446e-03, PNorm = 143.5842, GNorm = 0.3455, lr_0 = 4.0802e-04
Loss = 8.9936e-03, PNorm = 143.6087, GNorm = 0.2084, lr_0 = 4.0774e-04
Loss = 7.8108e-03, PNorm = 143.6266, GNorm = 0.3911, lr_0 = 4.0746e-04
Loss = 8.3289e-03, PNorm = 143.6443, GNorm = 0.1488, lr_0 = 4.0718e-04
Loss = 8.3739e-03, PNorm = 143.6641, GNorm = 0.5366, lr_0 = 4.0691e-04
Loss = 7.7995e-03, PNorm = 143.6830, GNorm = 0.3365, lr_0 = 4.0663e-04
Loss = 1.3109e-02, PNorm = 143.7038, GNorm = 0.1361, lr_0 = 4.0635e-04
Loss = 8.4735e-03, PNorm = 143.7250, GNorm = 0.1821, lr_0 = 4.0607e-04
Loss = 6.8352e-03, PNorm = 143.7490, GNorm = 0.2555, lr_0 = 4.0579e-04
Loss = 8.6930e-03, PNorm = 143.7732, GNorm = 0.2364, lr_0 = 4.0551e-04
Loss = 8.6850e-03, PNorm = 143.7939, GNorm = 0.4349, lr_0 = 4.0524e-04
Loss = 8.4951e-03, PNorm = 143.8085, GNorm = 0.4793, lr_0 = 4.0496e-04
Loss = 8.8111e-03, PNorm = 143.8256, GNorm = 0.1848, lr_0 = 4.0468e-04
Validation mae = 0.477318
Epoch 13
Loss = 1.0298e-02, PNorm = 143.8427, GNorm = 0.3063, lr_0 = 4.0440e-04
Loss = 7.0921e-03, PNorm = 143.8629, GNorm = 0.1595, lr_0 = 4.0413e-04
Loss = 6.2718e-03, PNorm = 143.8785, GNorm = 0.2936, lr_0 = 4.0385e-04
Loss = 9.5103e-03, PNorm = 143.8919, GNorm = 0.3320, lr_0 = 4.0357e-04
Loss = 6.7978e-03, PNorm = 143.9058, GNorm = 0.1655, lr_0 = 4.0330e-04
Loss = 7.7328e-03, PNorm = 143.9220, GNorm = 0.1504, lr_0 = 4.0302e-04
Loss = 6.1033e-03, PNorm = 143.9309, GNorm = 0.1554, lr_0 = 4.0274e-04
Loss = 7.5210e-03, PNorm = 143.9464, GNorm = 0.2282, lr_0 = 4.0247e-04
Loss = 7.9511e-03, PNorm = 143.9616, GNorm = 0.4827, lr_0 = 4.0219e-04
Loss = 6.6435e-03, PNorm = 143.9762, GNorm = 0.1060, lr_0 = 4.0192e-04
Loss = 7.2819e-03, PNorm = 143.9936, GNorm = 0.2148, lr_0 = 4.0164e-04
Loss = 7.2751e-03, PNorm = 144.0077, GNorm = 0.1872, lr_0 = 4.0137e-04
Loss = 7.1492e-03, PNorm = 144.0214, GNorm = 0.1783, lr_0 = 4.0109e-04
Loss = 7.4580e-03, PNorm = 144.0379, GNorm = 0.2506, lr_0 = 4.0082e-04
Loss = 5.9962e-03, PNorm = 144.0584, GNorm = 0.1436, lr_0 = 4.0054e-04
Loss = 7.0213e-03, PNorm = 144.0708, GNorm = 0.0833, lr_0 = 4.0027e-04
Loss = 7.8557e-03, PNorm = 144.0803, GNorm = 0.1356, lr_0 = 3.9999e-04
Loss = 5.9529e-03, PNorm = 144.0974, GNorm = 0.1434, lr_0 = 3.9972e-04
Loss = 5.7333e-03, PNorm = 144.1121, GNorm = 0.0923, lr_0 = 3.9945e-04
Loss = 6.6965e-03, PNorm = 144.1215, GNorm = 0.0846, lr_0 = 3.9917e-04
Loss = 6.5929e-03, PNorm = 144.1318, GNorm = 0.1018, lr_0 = 3.9890e-04
Loss = 9.5960e-03, PNorm = 144.1406, GNorm = 0.1119, lr_0 = 3.9863e-04
Loss = 8.9924e-03, PNorm = 144.1566, GNorm = 0.3605, lr_0 = 3.9835e-04
Loss = 7.5528e-03, PNorm = 144.1687, GNorm = 0.1160, lr_0 = 3.9808e-04
Loss = 7.2021e-03, PNorm = 144.1839, GNorm = 0.1052, lr_0 = 3.9781e-04
Loss = 8.1104e-03, PNorm = 144.1940, GNorm = 0.0935, lr_0 = 3.9753e-04
Loss = 7.1458e-03, PNorm = 144.2136, GNorm = 0.1150, lr_0 = 3.9726e-04
Loss = 6.7834e-03, PNorm = 144.2317, GNorm = 0.3774, lr_0 = 3.9699e-04
Loss = 7.9281e-03, PNorm = 144.2509, GNorm = 0.6417, lr_0 = 3.9672e-04
Loss = 7.2320e-03, PNorm = 144.2688, GNorm = 0.2410, lr_0 = 3.9645e-04
Loss = 7.9291e-03, PNorm = 144.2885, GNorm = 0.1810, lr_0 = 3.9617e-04
Loss = 6.2718e-03, PNorm = 144.3042, GNorm = 0.1308, lr_0 = 3.9590e-04
Loss = 8.0512e-03, PNorm = 144.3189, GNorm = 0.3491, lr_0 = 3.9563e-04
Loss = 6.8760e-03, PNorm = 144.3327, GNorm = 0.2488, lr_0 = 3.9536e-04
Loss = 6.8760e-03, PNorm = 144.3499, GNorm = 0.1446, lr_0 = 3.9509e-04
Loss = 7.7789e-03, PNorm = 144.3698, GNorm = 0.4052, lr_0 = 3.9482e-04
Loss = 7.1691e-03, PNorm = 144.3855, GNorm = 0.3768, lr_0 = 3.9455e-04
Loss = 6.9396e-03, PNorm = 144.4064, GNorm = 0.2997, lr_0 = 3.9428e-04
Loss = 6.8858e-03, PNorm = 144.4256, GNorm = 0.1109, lr_0 = 3.9401e-04
Loss = 1.0468e-02, PNorm = 144.4414, GNorm = 0.1263, lr_0 = 3.9374e-04
Loss = 6.1504e-03, PNorm = 144.4562, GNorm = 0.1150, lr_0 = 3.9347e-04
Loss = 9.8939e-03, PNorm = 144.4719, GNorm = 0.3447, lr_0 = 3.9320e-04
Loss = 7.1577e-03, PNorm = 144.4888, GNorm = 0.2920, lr_0 = 3.9293e-04
Loss = 6.6485e-03, PNorm = 144.5086, GNorm = 0.1690, lr_0 = 3.9266e-04
Loss = 6.0435e-03, PNorm = 144.5219, GNorm = 0.4186, lr_0 = 3.9239e-04
Loss = 5.7014e-03, PNorm = 144.5373, GNorm = 0.4394, lr_0 = 3.9212e-04
Loss = 6.4291e-03, PNorm = 144.5530, GNorm = 0.3224, lr_0 = 3.9185e-04
Loss = 5.9087e-03, PNorm = 144.5634, GNorm = 0.1430, lr_0 = 3.9159e-04
Loss = 6.1197e-03, PNorm = 144.5770, GNorm = 0.0955, lr_0 = 3.9132e-04
Loss = 7.2516e-03, PNorm = 144.5929, GNorm = 0.1756, lr_0 = 3.9105e-04
Loss = 8.4952e-03, PNorm = 144.6135, GNorm = 0.2447, lr_0 = 3.9078e-04
Loss = 6.2013e-03, PNorm = 144.6341, GNorm = 0.1190, lr_0 = 3.9051e-04
Loss = 5.6951e-03, PNorm = 144.6544, GNorm = 0.1149, lr_0 = 3.9025e-04
Loss = 7.3403e-03, PNorm = 144.6726, GNorm = 0.2074, lr_0 = 3.8998e-04
Loss = 7.4517e-03, PNorm = 144.6925, GNorm = 0.1422, lr_0 = 3.8971e-04
Loss = 7.2203e-03, PNorm = 144.7064, GNorm = 0.1280, lr_0 = 3.8945e-04
Loss = 8.1426e-03, PNorm = 144.7165, GNorm = 0.1573, lr_0 = 3.8918e-04
Loss = 7.8117e-03, PNorm = 144.7311, GNorm = 0.1544, lr_0 = 3.8891e-04
Loss = 6.5562e-03, PNorm = 144.7445, GNorm = 0.2908, lr_0 = 3.8865e-04
Loss = 6.8326e-03, PNorm = 144.7582, GNorm = 0.3906, lr_0 = 3.8838e-04
Loss = 6.9030e-03, PNorm = 144.7732, GNorm = 0.1161, lr_0 = 3.8811e-04
Loss = 6.9763e-03, PNorm = 144.7891, GNorm = 0.2317, lr_0 = 3.8785e-04
Loss = 7.0683e-03, PNorm = 144.8029, GNorm = 0.1768, lr_0 = 3.8758e-04
Loss = 7.0645e-03, PNorm = 144.8179, GNorm = 0.3452, lr_0 = 3.8732e-04
Loss = 6.8014e-03, PNorm = 144.8377, GNorm = 0.1487, lr_0 = 3.8705e-04
Loss = 8.6130e-03, PNorm = 144.8584, GNorm = 0.2485, lr_0 = 3.8679e-04
Loss = 6.3617e-03, PNorm = 144.8755, GNorm = 0.1646, lr_0 = 3.8652e-04
Loss = 1.0075e-02, PNorm = 144.8933, GNorm = 0.2990, lr_0 = 3.8626e-04
Loss = 8.6764e-03, PNorm = 144.9142, GNorm = 0.5748, lr_0 = 3.8599e-04
Loss = 7.4990e-03, PNorm = 144.9297, GNorm = 0.1856, lr_0 = 3.8573e-04
Loss = 6.4480e-03, PNorm = 144.9467, GNorm = 0.2362, lr_0 = 3.8546e-04
Loss = 8.4415e-03, PNorm = 144.9601, GNorm = 0.2078, lr_0 = 3.8520e-04
Loss = 6.9373e-03, PNorm = 144.9749, GNorm = 0.2816, lr_0 = 3.8493e-04
Loss = 8.3490e-03, PNorm = 144.9864, GNorm = 0.1905, lr_0 = 3.8467e-04
Loss = 7.0572e-03, PNorm = 145.0000, GNorm = 0.2741, lr_0 = 3.8441e-04
Loss = 7.7931e-03, PNorm = 145.0193, GNorm = 0.1689, lr_0 = 3.8414e-04
Loss = 5.7711e-03, PNorm = 145.0413, GNorm = 0.1315, lr_0 = 3.8388e-04
Loss = 7.3839e-03, PNorm = 145.0574, GNorm = 0.1611, lr_0 = 3.8362e-04
Loss = 6.7065e-03, PNorm = 145.0738, GNorm = 0.2085, lr_0 = 3.8336e-04
Loss = 7.0670e-03, PNorm = 145.0922, GNorm = 0.3096, lr_0 = 3.8309e-04
Loss = 7.4885e-03, PNorm = 145.1047, GNorm = 0.3001, lr_0 = 3.8283e-04
Loss = 9.0123e-03, PNorm = 145.1242, GNorm = 0.1606, lr_0 = 3.8257e-04
Loss = 7.2321e-03, PNorm = 145.1459, GNorm = 0.2235, lr_0 = 3.8231e-04
Loss = 7.5630e-03, PNorm = 145.1594, GNorm = 0.1828, lr_0 = 3.8204e-04
Loss = 8.4765e-03, PNorm = 145.1725, GNorm = 0.2790, lr_0 = 3.8178e-04
Loss = 6.8322e-03, PNorm = 145.1899, GNorm = 0.1367, lr_0 = 3.8152e-04
Loss = 6.5644e-03, PNorm = 145.2076, GNorm = 0.2738, lr_0 = 3.8126e-04
Loss = 7.0197e-03, PNorm = 145.2264, GNorm = 0.1312, lr_0 = 3.8100e-04
Loss = 9.6672e-03, PNorm = 145.2458, GNorm = 0.4461, lr_0 = 3.8074e-04
Loss = 8.1057e-03, PNorm = 145.2631, GNorm = 0.3298, lr_0 = 3.8048e-04
Loss = 6.4125e-03, PNorm = 145.2801, GNorm = 0.3258, lr_0 = 3.8022e-04
Loss = 6.4804e-03, PNorm = 145.2989, GNorm = 0.1241, lr_0 = 3.7995e-04
Loss = 6.3462e-03, PNorm = 145.3211, GNorm = 0.3346, lr_0 = 3.7969e-04
Loss = 6.1512e-03, PNorm = 145.3400, GNorm = 0.1945, lr_0 = 3.7943e-04
Loss = 7.2851e-03, PNorm = 145.3539, GNorm = 0.0940, lr_0 = 3.7917e-04
Loss = 8.2215e-03, PNorm = 145.3697, GNorm = 0.4944, lr_0 = 3.7891e-04
Loss = 6.3054e-03, PNorm = 145.3888, GNorm = 0.1066, lr_0 = 3.7866e-04
Loss = 7.9844e-03, PNorm = 145.4049, GNorm = 0.3472, lr_0 = 3.7840e-04
Loss = 5.6652e-03, PNorm = 145.4189, GNorm = 0.3512, lr_0 = 3.7814e-04
Loss = 6.3062e-03, PNorm = 145.4331, GNorm = 0.1922, lr_0 = 3.7788e-04
Loss = 8.7178e-03, PNorm = 145.4453, GNorm = 0.1401, lr_0 = 3.7762e-04
Loss = 6.0491e-03, PNorm = 145.4614, GNorm = 0.2229, lr_0 = 3.7736e-04
Loss = 7.1449e-03, PNorm = 145.4748, GNorm = 0.1437, lr_0 = 3.7710e-04
Loss = 6.2230e-03, PNorm = 145.4914, GNorm = 0.3522, lr_0 = 3.7684e-04
Loss = 8.0341e-03, PNorm = 145.5048, GNorm = 0.2222, lr_0 = 3.7659e-04
Loss = 8.6230e-03, PNorm = 145.5191, GNorm = 0.4363, lr_0 = 3.7633e-04
Loss = 8.3686e-03, PNorm = 145.5381, GNorm = 0.4864, lr_0 = 3.7607e-04
Loss = 9.0918e-03, PNorm = 145.5610, GNorm = 0.3308, lr_0 = 3.7581e-04
Loss = 6.9255e-03, PNorm = 145.5810, GNorm = 0.7036, lr_0 = 3.7555e-04
Loss = 7.0685e-03, PNorm = 145.6012, GNorm = 0.3988, lr_0 = 3.7530e-04
Loss = 6.6794e-03, PNorm = 145.6177, GNorm = 0.0944, lr_0 = 3.7504e-04
Loss = 6.6483e-03, PNorm = 145.6371, GNorm = 0.2950, lr_0 = 3.7478e-04
Loss = 9.0920e-03, PNorm = 145.6592, GNorm = 0.3250, lr_0 = 3.7453e-04
Loss = 8.6390e-03, PNorm = 145.6744, GNorm = 0.1630, lr_0 = 3.7427e-04
Loss = 8.5926e-03, PNorm = 145.6925, GNorm = 0.5115, lr_0 = 3.7401e-04
Loss = 7.3355e-03, PNorm = 145.7107, GNorm = 0.1911, lr_0 = 3.7376e-04
Loss = 7.2201e-03, PNorm = 145.7262, GNorm = 0.6371, lr_0 = 3.7350e-04
Loss = 1.1370e-02, PNorm = 145.7414, GNorm = 0.5006, lr_0 = 3.7325e-04
Loss = 6.9696e-03, PNorm = 145.7626, GNorm = 0.1113, lr_0 = 3.7299e-04
Loss = 7.3058e-03, PNorm = 145.7825, GNorm = 0.3213, lr_0 = 3.7273e-04
Validation mae = 0.478693
Epoch 14
Loss = 5.7547e-03, PNorm = 145.7959, GNorm = 0.4819, lr_0 = 3.7248e-04
Loss = 5.7660e-03, PNorm = 145.8046, GNorm = 0.1932, lr_0 = 3.7222e-04
Loss = 6.3824e-03, PNorm = 145.8128, GNorm = 0.2473, lr_0 = 3.7197e-04
Loss = 6.0743e-03, PNorm = 145.8242, GNorm = 0.2585, lr_0 = 3.7171e-04
Loss = 6.9497e-03, PNorm = 145.8421, GNorm = 0.2155, lr_0 = 3.7146e-04
Loss = 6.0223e-03, PNorm = 145.8545, GNorm = 0.2008, lr_0 = 3.7120e-04
Loss = 7.7108e-03, PNorm = 145.8673, GNorm = 0.2578, lr_0 = 3.7095e-04
Loss = 6.7819e-03, PNorm = 145.8764, GNorm = 0.3119, lr_0 = 3.7070e-04
Loss = 9.6049e-03, PNorm = 145.8911, GNorm = 0.2412, lr_0 = 3.7044e-04
Loss = 6.1139e-03, PNorm = 145.9050, GNorm = 0.1782, lr_0 = 3.7019e-04
Loss = 9.4446e-03, PNorm = 145.9165, GNorm = 0.1330, lr_0 = 3.6993e-04
Loss = 6.5420e-03, PNorm = 145.9279, GNorm = 0.1133, lr_0 = 3.6968e-04
Loss = 5.9007e-03, PNorm = 145.9415, GNorm = 0.3085, lr_0 = 3.6943e-04
Loss = 5.9060e-03, PNorm = 145.9555, GNorm = 0.0632, lr_0 = 3.6917e-04
Loss = 6.1174e-03, PNorm = 145.9666, GNorm = 0.1965, lr_0 = 3.6892e-04
Loss = 6.5626e-03, PNorm = 145.9772, GNorm = 0.2448, lr_0 = 3.6867e-04
Loss = 4.8973e-03, PNorm = 145.9869, GNorm = 0.1648, lr_0 = 3.6842e-04
Loss = 6.1767e-03, PNorm = 146.0009, GNorm = 0.1694, lr_0 = 3.6816e-04
Loss = 5.9493e-03, PNorm = 146.0149, GNorm = 0.4201, lr_0 = 3.6791e-04
Loss = 6.1237e-03, PNorm = 146.0275, GNorm = 0.4787, lr_0 = 3.6766e-04
Loss = 6.8912e-03, PNorm = 146.0374, GNorm = 0.1567, lr_0 = 3.6741e-04
Loss = 5.9303e-03, PNorm = 146.0499, GNorm = 0.2249, lr_0 = 3.6716e-04
Loss = 6.2889e-03, PNorm = 146.0673, GNorm = 0.2313, lr_0 = 3.6690e-04
Loss = 6.8639e-03, PNorm = 146.0774, GNorm = 0.1477, lr_0 = 3.6665e-04
Loss = 6.3311e-03, PNorm = 146.0889, GNorm = 0.3846, lr_0 = 3.6640e-04
Loss = 6.0980e-03, PNorm = 146.1009, GNorm = 0.2325, lr_0 = 3.6615e-04
Loss = 5.8325e-03, PNorm = 146.1140, GNorm = 0.1859, lr_0 = 3.6590e-04
Loss = 5.6240e-03, PNorm = 146.1284, GNorm = 0.3973, lr_0 = 3.6565e-04
Loss = 6.1109e-03, PNorm = 146.1416, GNorm = 0.3236, lr_0 = 3.6540e-04
Loss = 6.0628e-03, PNorm = 146.1534, GNorm = 0.1066, lr_0 = 3.6515e-04
Loss = 5.9850e-03, PNorm = 146.1623, GNorm = 0.2846, lr_0 = 3.6490e-04
Loss = 5.4233e-03, PNorm = 146.1719, GNorm = 0.1998, lr_0 = 3.6465e-04
Loss = 7.4086e-03, PNorm = 146.1844, GNorm = 0.3788, lr_0 = 3.6440e-04
Loss = 6.0275e-03, PNorm = 146.2018, GNorm = 0.2207, lr_0 = 3.6415e-04
Loss = 5.9089e-03, PNorm = 146.2154, GNorm = 0.0970, lr_0 = 3.6390e-04
Loss = 5.9244e-03, PNorm = 146.2319, GNorm = 0.2188, lr_0 = 3.6365e-04
Loss = 5.6763e-03, PNorm = 146.2472, GNorm = 0.0831, lr_0 = 3.6340e-04
Loss = 5.9974e-03, PNorm = 146.2600, GNorm = 0.1390, lr_0 = 3.6315e-04
Loss = 6.5692e-03, PNorm = 146.2736, GNorm = 0.1418, lr_0 = 3.6290e-04
Loss = 7.7685e-03, PNorm = 146.2891, GNorm = 0.2099, lr_0 = 3.6266e-04
Loss = 8.5995e-03, PNorm = 146.3061, GNorm = 0.1845, lr_0 = 3.6241e-04
Loss = 6.5958e-03, PNorm = 146.3208, GNorm = 0.1263, lr_0 = 3.6216e-04
Loss = 7.0759e-03, PNorm = 146.3367, GNorm = 0.2162, lr_0 = 3.6191e-04
Loss = 8.0879e-03, PNorm = 146.3511, GNorm = 0.1195, lr_0 = 3.6166e-04
Loss = 8.4776e-03, PNorm = 146.3645, GNorm = 0.2163, lr_0 = 3.6141e-04
Loss = 6.3474e-03, PNorm = 146.3834, GNorm = 0.1009, lr_0 = 3.6117e-04
Loss = 6.4817e-03, PNorm = 146.3973, GNorm = 0.1667, lr_0 = 3.6092e-04
Loss = 8.7810e-03, PNorm = 146.4097, GNorm = 0.7101, lr_0 = 3.6067e-04
Loss = 6.7769e-03, PNorm = 146.4235, GNorm = 0.0751, lr_0 = 3.6043e-04
Loss = 6.4657e-03, PNorm = 146.4336, GNorm = 0.2906, lr_0 = 3.6018e-04
Loss = 5.6214e-03, PNorm = 146.4461, GNorm = 0.3876, lr_0 = 3.5993e-04
Loss = 5.1979e-03, PNorm = 146.4586, GNorm = 0.1929, lr_0 = 3.5969e-04
Loss = 6.5470e-03, PNorm = 146.4735, GNorm = 0.4174, lr_0 = 3.5944e-04
Loss = 5.9206e-03, PNorm = 146.4876, GNorm = 0.0896, lr_0 = 3.5919e-04
Loss = 5.1034e-03, PNorm = 146.5059, GNorm = 0.4308, lr_0 = 3.5895e-04
Loss = 5.7934e-03, PNorm = 146.5197, GNorm = 0.2303, lr_0 = 3.5870e-04
Loss = 5.0631e-03, PNorm = 146.5307, GNorm = 0.0917, lr_0 = 3.5845e-04
Loss = 5.3479e-03, PNorm = 146.5411, GNorm = 0.1012, lr_0 = 3.5821e-04
Loss = 5.2886e-03, PNorm = 146.5535, GNorm = 0.1370, lr_0 = 3.5796e-04
Loss = 5.3387e-03, PNorm = 146.5626, GNorm = 0.5875, lr_0 = 3.5772e-04
Loss = 6.2037e-03, PNorm = 146.5724, GNorm = 0.2417, lr_0 = 3.5747e-04
Loss = 5.7793e-03, PNorm = 146.5878, GNorm = 0.2091, lr_0 = 3.5723e-04
Loss = 6.0235e-03, PNorm = 146.6041, GNorm = 0.2647, lr_0 = 3.5698e-04
Loss = 5.5762e-03, PNorm = 146.6193, GNorm = 0.1893, lr_0 = 3.5674e-04
Loss = 9.4410e-03, PNorm = 146.6388, GNorm = 0.3447, lr_0 = 3.5650e-04
Loss = 6.1549e-03, PNorm = 146.6536, GNorm = 0.1091, lr_0 = 3.5625e-04
Loss = 4.8050e-03, PNorm = 146.6656, GNorm = 0.0897, lr_0 = 3.5601e-04
Loss = 5.4209e-03, PNorm = 146.6764, GNorm = 0.1988, lr_0 = 3.5576e-04
Loss = 7.3925e-03, PNorm = 146.6893, GNorm = 0.1637, lr_0 = 3.5552e-04
Loss = 4.8929e-03, PNorm = 146.7048, GNorm = 0.0983, lr_0 = 3.5528e-04
Loss = 6.0393e-03, PNorm = 146.7172, GNorm = 0.1527, lr_0 = 3.5503e-04
Loss = 4.5242e-03, PNorm = 146.7271, GNorm = 0.1648, lr_0 = 3.5479e-04
Loss = 5.8244e-03, PNorm = 146.7363, GNorm = 0.1690, lr_0 = 3.5455e-04
Loss = 5.9023e-03, PNorm = 146.7456, GNorm = 0.2676, lr_0 = 3.5430e-04
Loss = 5.4145e-03, PNorm = 146.7568, GNorm = 0.1173, lr_0 = 3.5406e-04
Loss = 5.0321e-03, PNorm = 146.7703, GNorm = 0.1601, lr_0 = 3.5382e-04
Loss = 6.4493e-03, PNorm = 146.7810, GNorm = 0.1162, lr_0 = 3.5358e-04
Loss = 5.9365e-03, PNorm = 146.7925, GNorm = 0.1183, lr_0 = 3.5333e-04
Loss = 5.8305e-03, PNorm = 146.8094, GNorm = 0.2490, lr_0 = 3.5309e-04
Loss = 6.2527e-03, PNorm = 146.8212, GNorm = 0.3455, lr_0 = 3.5285e-04
Loss = 6.5586e-03, PNorm = 146.8318, GNorm = 0.3554, lr_0 = 3.5261e-04
Loss = 5.4907e-03, PNorm = 146.8462, GNorm = 0.1761, lr_0 = 3.5237e-04
Loss = 6.2873e-03, PNorm = 146.8618, GNorm = 0.2238, lr_0 = 3.5212e-04
Loss = 5.4171e-03, PNorm = 146.8789, GNorm = 0.1934, lr_0 = 3.5188e-04
Loss = 6.7664e-03, PNorm = 146.8936, GNorm = 0.3618, lr_0 = 3.5164e-04
Loss = 7.2822e-03, PNorm = 146.9059, GNorm = 0.1252, lr_0 = 3.5140e-04
Loss = 4.8428e-03, PNorm = 146.9215, GNorm = 0.3290, lr_0 = 3.5116e-04
Loss = 4.8487e-03, PNorm = 146.9378, GNorm = 0.3523, lr_0 = 3.5092e-04
Loss = 5.7473e-03, PNorm = 146.9521, GNorm = 0.3065, lr_0 = 3.5068e-04
Loss = 5.8032e-03, PNorm = 146.9656, GNorm = 0.3799, lr_0 = 3.5044e-04
Loss = 6.4807e-03, PNorm = 146.9808, GNorm = 0.4615, lr_0 = 3.5020e-04
Loss = 7.4261e-03, PNorm = 146.9959, GNorm = 0.5037, lr_0 = 3.4996e-04
Loss = 5.2714e-03, PNorm = 147.0116, GNorm = 0.3977, lr_0 = 3.4972e-04
Loss = 6.1960e-03, PNorm = 147.0239, GNorm = 0.3077, lr_0 = 3.4948e-04
Loss = 5.2490e-03, PNorm = 147.0410, GNorm = 0.3514, lr_0 = 3.4924e-04
Loss = 6.1910e-03, PNorm = 147.0490, GNorm = 0.1743, lr_0 = 3.4900e-04
Loss = 6.6049e-03, PNorm = 147.0607, GNorm = 0.2025, lr_0 = 3.4876e-04
Loss = 5.5510e-03, PNorm = 147.0785, GNorm = 0.2042, lr_0 = 3.4852e-04
Loss = 5.7636e-03, PNorm = 147.0958, GNorm = 0.2015, lr_0 = 3.4828e-04
Loss = 5.1308e-03, PNorm = 147.1115, GNorm = 0.2504, lr_0 = 3.4805e-04
Loss = 5.3442e-03, PNorm = 147.1272, GNorm = 0.2577, lr_0 = 3.4781e-04
Loss = 6.9461e-03, PNorm = 147.1435, GNorm = 0.2610, lr_0 = 3.4757e-04
Loss = 7.0232e-03, PNorm = 147.1583, GNorm = 0.0824, lr_0 = 3.4733e-04
Loss = 7.3928e-03, PNorm = 147.1759, GNorm = 0.2558, lr_0 = 3.4709e-04
Loss = 5.7952e-03, PNorm = 147.1857, GNorm = 0.2491, lr_0 = 3.4686e-04
Loss = 5.9733e-03, PNorm = 147.1946, GNorm = 0.4514, lr_0 = 3.4662e-04
Loss = 5.2946e-03, PNorm = 147.2091, GNorm = 0.1850, lr_0 = 3.4638e-04
Loss = 6.4509e-03, PNorm = 147.2228, GNorm = 0.0691, lr_0 = 3.4614e-04
Loss = 4.7879e-03, PNorm = 147.2345, GNorm = 0.2032, lr_0 = 3.4591e-04
Loss = 5.2861e-03, PNorm = 147.2457, GNorm = 0.1598, lr_0 = 3.4567e-04
Loss = 5.4132e-03, PNorm = 147.2575, GNorm = 0.1899, lr_0 = 3.4543e-04
Loss = 5.2347e-03, PNorm = 147.2687, GNorm = 0.1081, lr_0 = 3.4520e-04
Loss = 6.2555e-03, PNorm = 147.2857, GNorm = 0.1889, lr_0 = 3.4496e-04
Loss = 9.1433e-03, PNorm = 147.3039, GNorm = 0.3118, lr_0 = 3.4472e-04
Loss = 6.7728e-03, PNorm = 147.3213, GNorm = 0.1071, lr_0 = 3.4449e-04
Loss = 7.1898e-03, PNorm = 147.3359, GNorm = 1.0107, lr_0 = 3.4425e-04
Loss = 4.9262e-03, PNorm = 147.3497, GNorm = 0.3882, lr_0 = 3.4402e-04
Loss = 8.0674e-03, PNorm = 147.3654, GNorm = 0.2103, lr_0 = 3.4378e-04
Loss = 6.6548e-03, PNorm = 147.3815, GNorm = 0.2654, lr_0 = 3.4354e-04
Loss = 4.8513e-03, PNorm = 147.3924, GNorm = 0.0731, lr_0 = 3.4331e-04
Validation mae = 0.478921
Epoch 15
Loss = 6.3625e-03, PNorm = 147.4040, GNorm = 0.2629, lr_0 = 3.4307e-04
Loss = 4.7598e-03, PNorm = 147.4153, GNorm = 0.1348, lr_0 = 3.4284e-04
Loss = 4.3675e-03, PNorm = 147.4281, GNorm = 0.1266, lr_0 = 3.4260e-04
Loss = 7.3049e-03, PNorm = 147.4365, GNorm = 0.1869, lr_0 = 3.4237e-04
Loss = 4.8969e-03, PNorm = 147.4463, GNorm = 0.1779, lr_0 = 3.4213e-04
Loss = 4.6967e-03, PNorm = 147.4554, GNorm = 0.1784, lr_0 = 3.4190e-04
Loss = 4.5234e-03, PNorm = 147.4677, GNorm = 0.2859, lr_0 = 3.4167e-04
Loss = 4.7982e-03, PNorm = 147.4803, GNorm = 0.1324, lr_0 = 3.4143e-04
Loss = 4.7298e-03, PNorm = 147.4952, GNorm = 0.0815, lr_0 = 3.4120e-04
Loss = 7.8137e-03, PNorm = 147.5053, GNorm = 0.1326, lr_0 = 3.4096e-04
Loss = 4.7215e-03, PNorm = 147.5169, GNorm = 0.2084, lr_0 = 3.4073e-04
Loss = 6.0762e-03, PNorm = 147.5287, GNorm = 0.2436, lr_0 = 3.4050e-04
Loss = 5.7025e-03, PNorm = 147.5378, GNorm = 0.2022, lr_0 = 3.4026e-04
Loss = 6.7533e-03, PNorm = 147.5474, GNorm = 0.0794, lr_0 = 3.4003e-04
Loss = 6.4725e-03, PNorm = 147.5600, GNorm = 0.0991, lr_0 = 3.3980e-04
Loss = 5.8865e-03, PNorm = 147.5716, GNorm = 0.2022, lr_0 = 3.3956e-04
Loss = 4.8666e-03, PNorm = 147.5777, GNorm = 0.2467, lr_0 = 3.3933e-04
Loss = 6.2365e-03, PNorm = 147.5860, GNorm = 0.1846, lr_0 = 3.3910e-04
Loss = 4.3226e-03, PNorm = 147.5949, GNorm = 0.0989, lr_0 = 3.3887e-04
Loss = 4.9126e-03, PNorm = 147.6009, GNorm = 0.0959, lr_0 = 3.3864e-04
Loss = 6.8625e-03, PNorm = 147.6084, GNorm = 0.1819, lr_0 = 3.3840e-04
Loss = 5.0011e-03, PNorm = 147.6171, GNorm = 0.1084, lr_0 = 3.3817e-04
Loss = 4.4708e-03, PNorm = 147.6249, GNorm = 0.3249, lr_0 = 3.3794e-04
Loss = 4.9112e-03, PNorm = 147.6299, GNorm = 0.0753, lr_0 = 3.3771e-04
Loss = 7.2395e-03, PNorm = 147.6414, GNorm = 0.2380, lr_0 = 3.3748e-04
Loss = 7.4405e-03, PNorm = 147.6532, GNorm = 0.1542, lr_0 = 3.3725e-04
Loss = 4.8577e-03, PNorm = 147.6643, GNorm = 0.0802, lr_0 = 3.3701e-04
Loss = 5.2251e-03, PNorm = 147.6822, GNorm = 0.1512, lr_0 = 3.3678e-04
Loss = 5.5583e-03, PNorm = 147.6958, GNorm = 0.4080, lr_0 = 3.3655e-04
Loss = 4.5671e-03, PNorm = 147.7062, GNorm = 0.1125, lr_0 = 3.3632e-04
Loss = 5.9747e-03, PNorm = 147.7152, GNorm = 0.1100, lr_0 = 3.3609e-04
Loss = 5.2684e-03, PNorm = 147.7269, GNorm = 0.1058, lr_0 = 3.3586e-04
Loss = 4.2042e-03, PNorm = 147.7376, GNorm = 0.0766, lr_0 = 3.3563e-04
Loss = 4.6802e-03, PNorm = 147.7475, GNorm = 0.2277, lr_0 = 3.3540e-04
Loss = 4.4542e-03, PNorm = 147.7562, GNorm = 0.0585, lr_0 = 3.3517e-04
Loss = 4.6119e-03, PNorm = 147.7695, GNorm = 0.1237, lr_0 = 3.3494e-04
Loss = 4.4950e-03, PNorm = 147.7806, GNorm = 0.1054, lr_0 = 3.3471e-04
Loss = 4.8829e-03, PNorm = 147.7910, GNorm = 0.1568, lr_0 = 3.3448e-04
Loss = 5.5871e-03, PNorm = 147.8036, GNorm = 0.1595, lr_0 = 3.3425e-04
Loss = 5.0853e-03, PNorm = 147.8160, GNorm = 0.3656, lr_0 = 3.3403e-04
Loss = 5.0258e-03, PNorm = 147.8253, GNorm = 0.2291, lr_0 = 3.3380e-04
Loss = 4.9776e-03, PNorm = 147.8370, GNorm = 0.1491, lr_0 = 3.3357e-04
Loss = 5.3904e-03, PNorm = 147.8515, GNorm = 0.0758, lr_0 = 3.3334e-04
Loss = 5.6380e-03, PNorm = 147.8650, GNorm = 0.1098, lr_0 = 3.3311e-04
Loss = 6.8387e-03, PNorm = 147.8766, GNorm = 0.4067, lr_0 = 3.3288e-04
Loss = 6.2511e-03, PNorm = 147.8846, GNorm = 0.1026, lr_0 = 3.3265e-04
Loss = 7.3589e-03, PNorm = 147.8970, GNorm = 0.2139, lr_0 = 3.3243e-04
Loss = 6.4435e-03, PNorm = 147.9110, GNorm = 0.3725, lr_0 = 3.3220e-04
Loss = 5.6057e-03, PNorm = 147.9243, GNorm = 0.1180, lr_0 = 3.3197e-04
Loss = 5.9277e-03, PNorm = 147.9343, GNorm = 0.1210, lr_0 = 3.3174e-04
Loss = 4.7833e-03, PNorm = 147.9478, GNorm = 0.1285, lr_0 = 3.3152e-04
Loss = 4.2052e-03, PNorm = 147.9630, GNorm = 0.0854, lr_0 = 3.3129e-04
Loss = 4.7191e-03, PNorm = 147.9720, GNorm = 0.0964, lr_0 = 3.3106e-04
Loss = 5.6846e-03, PNorm = 147.9852, GNorm = 0.1210, lr_0 = 3.3084e-04
Loss = 7.0644e-03, PNorm = 147.9929, GNorm = 0.3796, lr_0 = 3.3061e-04
Loss = 5.4817e-03, PNorm = 148.0011, GNorm = 0.1692, lr_0 = 3.3038e-04
Loss = 4.7818e-03, PNorm = 148.0094, GNorm = 0.1387, lr_0 = 3.3016e-04
Loss = 6.0733e-03, PNorm = 148.0217, GNorm = 0.1304, lr_0 = 3.2993e-04
Loss = 4.3966e-03, PNorm = 148.0382, GNorm = 0.1445, lr_0 = 3.2970e-04
Loss = 5.0895e-03, PNorm = 148.0499, GNorm = 0.0730, lr_0 = 3.2948e-04
Loss = 5.1147e-03, PNorm = 148.0613, GNorm = 0.4204, lr_0 = 3.2925e-04
Loss = 4.7787e-03, PNorm = 148.0720, GNorm = 0.1343, lr_0 = 3.2903e-04
Loss = 5.7241e-03, PNorm = 148.0842, GNorm = 0.3838, lr_0 = 3.2880e-04
Loss = 5.3343e-03, PNorm = 148.0951, GNorm = 0.2640, lr_0 = 3.2858e-04
Loss = 5.1744e-03, PNorm = 148.1124, GNorm = 0.2041, lr_0 = 3.2835e-04
Loss = 4.4677e-03, PNorm = 148.1248, GNorm = 0.1194, lr_0 = 3.2813e-04
Loss = 7.2170e-03, PNorm = 148.1390, GNorm = 0.4500, lr_0 = 3.2790e-04
Loss = 6.7093e-03, PNorm = 148.1502, GNorm = 0.3182, lr_0 = 3.2768e-04
Loss = 6.1082e-03, PNorm = 148.1620, GNorm = 0.1191, lr_0 = 3.2745e-04
Loss = 5.0767e-03, PNorm = 148.1776, GNorm = 0.3801, lr_0 = 3.2723e-04
Loss = 4.9024e-03, PNorm = 148.1895, GNorm = 0.2012, lr_0 = 3.2700e-04
Loss = 5.8059e-03, PNorm = 148.1994, GNorm = 0.1495, lr_0 = 3.2678e-04
Loss = 5.2395e-03, PNorm = 148.2124, GNorm = 0.7120, lr_0 = 3.2656e-04
Loss = 5.2121e-03, PNorm = 148.2251, GNorm = 0.4632, lr_0 = 3.2633e-04
Loss = 6.4877e-03, PNorm = 148.2393, GNorm = 0.1248, lr_0 = 3.2611e-04
Loss = 5.2356e-03, PNorm = 148.2530, GNorm = 0.4372, lr_0 = 3.2589e-04
Loss = 7.5204e-03, PNorm = 148.2662, GNorm = 0.2625, lr_0 = 3.2566e-04
Loss = 6.1719e-03, PNorm = 148.2799, GNorm = 0.1103, lr_0 = 3.2544e-04
Loss = 4.0549e-03, PNorm = 148.2886, GNorm = 0.2373, lr_0 = 3.2522e-04
Loss = 7.0644e-03, PNorm = 148.2979, GNorm = 0.2056, lr_0 = 3.2499e-04
Loss = 4.1113e-03, PNorm = 148.3109, GNorm = 0.1324, lr_0 = 3.2477e-04
Loss = 5.4941e-03, PNorm = 148.3230, GNorm = 0.4279, lr_0 = 3.2455e-04
Loss = 5.1394e-03, PNorm = 148.3401, GNorm = 0.3107, lr_0 = 3.2433e-04
Loss = 4.5606e-03, PNorm = 148.3530, GNorm = 0.2175, lr_0 = 3.2410e-04
Loss = 4.4918e-03, PNorm = 148.3651, GNorm = 0.2455, lr_0 = 3.2388e-04
Loss = 5.1301e-03, PNorm = 148.3751, GNorm = 0.2186, lr_0 = 3.2366e-04
Loss = 4.4421e-03, PNorm = 148.3843, GNorm = 0.0619, lr_0 = 3.2344e-04
Loss = 4.7898e-03, PNorm = 148.3940, GNorm = 0.4246, lr_0 = 3.2322e-04
Loss = 4.4853e-03, PNorm = 148.4052, GNorm = 0.2275, lr_0 = 3.2300e-04
Loss = 5.4793e-03, PNorm = 148.4153, GNorm = 0.1050, lr_0 = 3.2277e-04
Loss = 4.4956e-03, PNorm = 148.4229, GNorm = 0.0760, lr_0 = 3.2255e-04
Loss = 4.0588e-03, PNorm = 148.4314, GNorm = 0.1444, lr_0 = 3.2233e-04
Loss = 4.6089e-03, PNorm = 148.4442, GNorm = 0.0910, lr_0 = 3.2211e-04
Loss = 4.0349e-03, PNorm = 148.4561, GNorm = 0.1640, lr_0 = 3.2189e-04
Loss = 6.8049e-03, PNorm = 148.4698, GNorm = 0.8621, lr_0 = 3.2167e-04
Loss = 3.7472e-03, PNorm = 148.4813, GNorm = 0.1959, lr_0 = 3.2145e-04
Loss = 5.3616e-03, PNorm = 148.4917, GNorm = 0.3084, lr_0 = 3.2123e-04
Loss = 4.8469e-03, PNorm = 148.5057, GNorm = 0.2149, lr_0 = 3.2101e-04
Loss = 4.3263e-03, PNorm = 148.5178, GNorm = 0.2237, lr_0 = 3.2079e-04
Loss = 4.5567e-03, PNorm = 148.5278, GNorm = 0.2662, lr_0 = 3.2057e-04
Loss = 4.6430e-03, PNorm = 148.5364, GNorm = 0.1129, lr_0 = 3.2035e-04
Loss = 5.9731e-03, PNorm = 148.5477, GNorm = 0.0948, lr_0 = 3.2013e-04
Loss = 9.1735e-03, PNorm = 148.5591, GNorm = 0.1972, lr_0 = 3.1991e-04
Loss = 4.1210e-03, PNorm = 148.5693, GNorm = 0.4043, lr_0 = 3.1969e-04
Loss = 6.1801e-03, PNorm = 148.5839, GNorm = 0.3376, lr_0 = 3.1947e-04
Loss = 5.0043e-03, PNorm = 148.5994, GNorm = 0.1858, lr_0 = 3.1925e-04
Loss = 5.0207e-03, PNorm = 148.6147, GNorm = 0.3045, lr_0 = 3.1904e-04
Loss = 5.3334e-03, PNorm = 148.6216, GNorm = 0.4440, lr_0 = 3.1882e-04
Loss = 5.4109e-03, PNorm = 148.6317, GNorm = 0.1042, lr_0 = 3.1860e-04
Loss = 5.5022e-03, PNorm = 148.6455, GNorm = 0.0617, lr_0 = 3.1838e-04
Loss = 4.5481e-03, PNorm = 148.6625, GNorm = 0.0747, lr_0 = 3.1816e-04
Loss = 5.3362e-03, PNorm = 148.6750, GNorm = 0.1041, lr_0 = 3.1794e-04
Loss = 7.4033e-03, PNorm = 148.6891, GNorm = 0.1683, lr_0 = 3.1773e-04
Loss = 6.1978e-03, PNorm = 148.7057, GNorm = 0.1286, lr_0 = 3.1751e-04
Loss = 5.7982e-03, PNorm = 148.7186, GNorm = 0.0936, lr_0 = 3.1729e-04
Loss = 4.1904e-03, PNorm = 148.7282, GNorm = 0.0793, lr_0 = 3.1707e-04
Loss = 5.9933e-03, PNorm = 148.7409, GNorm = 0.2015, lr_0 = 3.1686e-04
Loss = 5.9479e-03, PNorm = 148.7530, GNorm = 0.1799, lr_0 = 3.1664e-04
Loss = 3.9787e-03, PNorm = 148.7627, GNorm = 0.1642, lr_0 = 3.1642e-04
Loss = 4.8644e-03, PNorm = 148.7739, GNorm = 0.0601, lr_0 = 3.1621e-04
Validation mae = 0.477302
Epoch 16
Loss = 4.6900e-03, PNorm = 148.7811, GNorm = 0.1644, lr_0 = 3.1599e-04
Loss = 3.9386e-03, PNorm = 148.7871, GNorm = 0.0685, lr_0 = 3.1577e-04
Loss = 4.7189e-03, PNorm = 148.7961, GNorm = 0.1233, lr_0 = 3.1556e-04
Loss = 5.1980e-03, PNorm = 148.8048, GNorm = 0.1190, lr_0 = 3.1534e-04
Loss = 4.1443e-03, PNorm = 148.8122, GNorm = 0.1191, lr_0 = 3.1512e-04
Loss = 4.1856e-03, PNorm = 148.8146, GNorm = 0.2743, lr_0 = 3.1491e-04
Loss = 4.9327e-03, PNorm = 148.8231, GNorm = 0.0712, lr_0 = 3.1469e-04
Loss = 4.5252e-03, PNorm = 148.8320, GNorm = 0.1173, lr_0 = 3.1448e-04
Loss = 3.7855e-03, PNorm = 148.8426, GNorm = 0.1535, lr_0 = 3.1426e-04
Loss = 4.5422e-03, PNorm = 148.8530, GNorm = 0.1099, lr_0 = 3.1405e-04
Loss = 3.9162e-03, PNorm = 148.8629, GNorm = 0.1323, lr_0 = 3.1383e-04
Loss = 5.0703e-03, PNorm = 148.8694, GNorm = 0.1518, lr_0 = 3.1362e-04
Loss = 4.3406e-03, PNorm = 148.8818, GNorm = 0.0933, lr_0 = 3.1340e-04
Loss = 4.0440e-03, PNorm = 148.8925, GNorm = 0.2018, lr_0 = 3.1319e-04
Loss = 4.5809e-03, PNorm = 148.9005, GNorm = 0.1051, lr_0 = 3.1297e-04
Loss = 4.3645e-03, PNorm = 148.9098, GNorm = 0.1702, lr_0 = 3.1276e-04
Loss = 3.9240e-03, PNorm = 148.9177, GNorm = 0.2871, lr_0 = 3.1254e-04
Loss = 3.4955e-03, PNorm = 148.9274, GNorm = 0.1049, lr_0 = 3.1233e-04
Loss = 3.8532e-03, PNorm = 148.9356, GNorm = 0.1743, lr_0 = 3.1212e-04
Loss = 5.6967e-03, PNorm = 148.9426, GNorm = 0.2947, lr_0 = 3.1190e-04
Loss = 3.8141e-03, PNorm = 148.9529, GNorm = 0.3524, lr_0 = 3.1169e-04
Loss = 4.2073e-03, PNorm = 148.9630, GNorm = 0.2093, lr_0 = 3.1147e-04
Loss = 3.9839e-03, PNorm = 148.9703, GNorm = 0.1059, lr_0 = 3.1126e-04
Loss = 5.0995e-03, PNorm = 148.9807, GNorm = 0.1968, lr_0 = 3.1105e-04
Loss = 4.4716e-03, PNorm = 148.9889, GNorm = 0.5219, lr_0 = 3.1083e-04
Loss = 5.4899e-03, PNorm = 148.9973, GNorm = 0.2112, lr_0 = 3.1062e-04
Loss = 3.9686e-03, PNorm = 149.0028, GNorm = 0.1689, lr_0 = 3.1041e-04
Loss = 4.5803e-03, PNorm = 149.0116, GNorm = 0.4126, lr_0 = 3.1020e-04
Loss = 4.9512e-03, PNorm = 149.0231, GNorm = 0.3069, lr_0 = 3.0998e-04
Loss = 5.4038e-03, PNorm = 149.0324, GNorm = 0.1593, lr_0 = 3.0977e-04
Loss = 3.8217e-03, PNorm = 149.0403, GNorm = 0.2893, lr_0 = 3.0956e-04
Loss = 4.5744e-03, PNorm = 149.0469, GNorm = 0.3713, lr_0 = 3.0935e-04
Loss = 5.8348e-03, PNorm = 149.0590, GNorm = 0.2968, lr_0 = 3.0914e-04
Loss = 5.5289e-03, PNorm = 149.0673, GNorm = 0.1037, lr_0 = 3.0892e-04
Loss = 5.0552e-03, PNorm = 149.0757, GNorm = 0.0906, lr_0 = 3.0871e-04
Loss = 4.1015e-03, PNorm = 149.0851, GNorm = 0.1712, lr_0 = 3.0850e-04
Loss = 5.0920e-03, PNorm = 149.0930, GNorm = 0.0817, lr_0 = 3.0829e-04
Loss = 4.4048e-03, PNorm = 149.1054, GNorm = 0.1204, lr_0 = 3.0808e-04
Loss = 4.1905e-03, PNorm = 149.1161, GNorm = 0.1255, lr_0 = 3.0787e-04
Loss = 5.0115e-03, PNorm = 149.1246, GNorm = 0.2438, lr_0 = 3.0766e-04
Loss = 4.7213e-03, PNorm = 149.1316, GNorm = 0.1699, lr_0 = 3.0745e-04
Loss = 3.8338e-03, PNorm = 149.1406, GNorm = 0.1269, lr_0 = 3.0723e-04
Loss = 4.4051e-03, PNorm = 149.1513, GNorm = 0.1743, lr_0 = 3.0702e-04
Loss = 6.2493e-03, PNorm = 149.1616, GNorm = 0.4865, lr_0 = 3.0681e-04
Loss = 3.6733e-03, PNorm = 149.1714, GNorm = 0.2528, lr_0 = 3.0660e-04
Loss = 4.0491e-03, PNorm = 149.1800, GNorm = 0.1514, lr_0 = 3.0639e-04
Loss = 4.0427e-03, PNorm = 149.1905, GNorm = 0.1297, lr_0 = 3.0618e-04
Loss = 3.6243e-03, PNorm = 149.1979, GNorm = 0.1578, lr_0 = 3.0597e-04
Loss = 8.8571e-03, PNorm = 149.2068, GNorm = 0.2490, lr_0 = 3.0576e-04
Loss = 3.6784e-03, PNorm = 149.2183, GNorm = 0.1733, lr_0 = 3.0555e-04
Loss = 5.3805e-03, PNorm = 149.2300, GNorm = 0.2751, lr_0 = 3.0535e-04
Loss = 3.7823e-03, PNorm = 149.2417, GNorm = 0.0693, lr_0 = 3.0514e-04
Loss = 4.6568e-03, PNorm = 149.2523, GNorm = 0.1293, lr_0 = 3.0493e-04
Loss = 4.2755e-03, PNorm = 149.2608, GNorm = 0.0712, lr_0 = 3.0472e-04
Loss = 3.8709e-03, PNorm = 149.2669, GNorm = 0.2639, lr_0 = 3.0451e-04
Loss = 5.9304e-03, PNorm = 149.2766, GNorm = 0.2034, lr_0 = 3.0430e-04
Loss = 3.9718e-03, PNorm = 149.2881, GNorm = 0.0863, lr_0 = 3.0409e-04
Loss = 5.4213e-03, PNorm = 149.2984, GNorm = 0.2540, lr_0 = 3.0388e-04
Loss = 3.6776e-03, PNorm = 149.3102, GNorm = 0.1366, lr_0 = 3.0368e-04
Loss = 4.1111e-03, PNorm = 149.3209, GNorm = 0.1280, lr_0 = 3.0347e-04
Loss = 5.7653e-03, PNorm = 149.3312, GNorm = 0.2607, lr_0 = 3.0326e-04
Loss = 4.9169e-03, PNorm = 149.3386, GNorm = 0.2646, lr_0 = 3.0305e-04
Loss = 4.0887e-03, PNorm = 149.3493, GNorm = 0.0538, lr_0 = 3.0284e-04
Loss = 4.7099e-03, PNorm = 149.3627, GNorm = 0.2328, lr_0 = 3.0264e-04
Loss = 5.6249e-03, PNorm = 149.3715, GNorm = 0.1725, lr_0 = 3.0243e-04
Loss = 3.9278e-03, PNorm = 149.3773, GNorm = 0.1800, lr_0 = 3.0222e-04
Loss = 4.0664e-03, PNorm = 149.3861, GNorm = 0.0555, lr_0 = 3.0202e-04
Loss = 6.3006e-03, PNorm = 149.3980, GNorm = 0.0673, lr_0 = 3.0181e-04
Loss = 4.0248e-03, PNorm = 149.4094, GNorm = 0.1739, lr_0 = 3.0160e-04
Loss = 3.5754e-03, PNorm = 149.4180, GNorm = 0.0988, lr_0 = 3.0140e-04
Loss = 4.2437e-03, PNorm = 149.4306, GNorm = 0.2111, lr_0 = 3.0119e-04
Loss = 4.3363e-03, PNorm = 149.4382, GNorm = 0.1296, lr_0 = 3.0098e-04
Loss = 4.2937e-03, PNorm = 149.4452, GNorm = 0.1016, lr_0 = 3.0078e-04
Loss = 4.8484e-03, PNorm = 149.4555, GNorm = 0.0633, lr_0 = 3.0057e-04
Loss = 4.0529e-03, PNorm = 149.4670, GNorm = 0.2174, lr_0 = 3.0036e-04
Loss = 4.1296e-03, PNorm = 149.4776, GNorm = 0.1832, lr_0 = 3.0016e-04
Loss = 4.4746e-03, PNorm = 149.4857, GNorm = 0.2674, lr_0 = 2.9995e-04
Loss = 4.8942e-03, PNorm = 149.4925, GNorm = 0.3196, lr_0 = 2.9975e-04
Loss = 6.3935e-03, PNorm = 149.5007, GNorm = 0.1647, lr_0 = 2.9954e-04
Loss = 4.3331e-03, PNorm = 149.5122, GNorm = 0.2285, lr_0 = 2.9934e-04
Loss = 6.5505e-03, PNorm = 149.5269, GNorm = 0.3923, lr_0 = 2.9913e-04
Loss = 5.3561e-03, PNorm = 149.5372, GNorm = 0.0931, lr_0 = 2.9893e-04
Loss = 4.5101e-03, PNorm = 149.5469, GNorm = 0.1141, lr_0 = 2.9872e-04
Loss = 4.4303e-03, PNorm = 149.5547, GNorm = 0.1687, lr_0 = 2.9852e-04
Loss = 3.5380e-03, PNorm = 149.5629, GNorm = 0.0880, lr_0 = 2.9831e-04
Loss = 3.7425e-03, PNorm = 149.5703, GNorm = 0.2445, lr_0 = 2.9811e-04
Loss = 3.8604e-03, PNorm = 149.5765, GNorm = 0.0999, lr_0 = 2.9790e-04
Loss = 4.2379e-03, PNorm = 149.5869, GNorm = 0.1488, lr_0 = 2.9770e-04
Loss = 3.7337e-03, PNorm = 149.5986, GNorm = 0.1727, lr_0 = 2.9750e-04
Loss = 4.4513e-03, PNorm = 149.6082, GNorm = 0.0708, lr_0 = 2.9729e-04
Loss = 4.4099e-03, PNorm = 149.6189, GNorm = 0.2285, lr_0 = 2.9709e-04
Loss = 4.3616e-03, PNorm = 149.6290, GNorm = 0.0913, lr_0 = 2.9689e-04
Loss = 4.7855e-03, PNorm = 149.6392, GNorm = 0.2697, lr_0 = 2.9668e-04
Loss = 5.4431e-03, PNorm = 149.6519, GNorm = 0.1255, lr_0 = 2.9648e-04
Loss = 3.6165e-03, PNorm = 149.6639, GNorm = 0.0761, lr_0 = 2.9628e-04
Loss = 3.4894e-03, PNorm = 149.6761, GNorm = 0.0541, lr_0 = 2.9607e-04
Loss = 4.5358e-03, PNorm = 149.6878, GNorm = 0.1586, lr_0 = 2.9587e-04
Loss = 5.1972e-03, PNorm = 149.6967, GNorm = 0.1159, lr_0 = 2.9567e-04
Loss = 4.5928e-03, PNorm = 149.7075, GNorm = 0.1177, lr_0 = 2.9546e-04
Loss = 8.2306e-03, PNorm = 149.7173, GNorm = 0.1218, lr_0 = 2.9526e-04
Loss = 4.8666e-03, PNorm = 149.7283, GNorm = 0.1970, lr_0 = 2.9506e-04
Loss = 6.6950e-03, PNorm = 149.7383, GNorm = 0.1812, lr_0 = 2.9486e-04
Loss = 4.6876e-03, PNorm = 149.7510, GNorm = 0.2091, lr_0 = 2.9466e-04
Loss = 4.9173e-03, PNorm = 149.7605, GNorm = 0.3038, lr_0 = 2.9445e-04
Loss = 3.7845e-03, PNorm = 149.7685, GNorm = 0.2639, lr_0 = 2.9425e-04
Loss = 4.5449e-03, PNorm = 149.7761, GNorm = 0.2630, lr_0 = 2.9405e-04
Loss = 4.2915e-03, PNorm = 149.7897, GNorm = 0.2874, lr_0 = 2.9385e-04
Loss = 3.6104e-03, PNorm = 149.7985, GNorm = 0.0992, lr_0 = 2.9365e-04
Loss = 3.6293e-03, PNorm = 149.8047, GNorm = 0.2068, lr_0 = 2.9345e-04
Loss = 4.4488e-03, PNorm = 149.8139, GNorm = 0.2818, lr_0 = 2.9325e-04
Loss = 3.7422e-03, PNorm = 149.8239, GNorm = 0.2552, lr_0 = 2.9305e-04
Loss = 4.7201e-03, PNorm = 149.8322, GNorm = 0.0711, lr_0 = 2.9284e-04
Loss = 4.7368e-03, PNorm = 149.8406, GNorm = 0.3380, lr_0 = 2.9264e-04
Loss = 4.2322e-03, PNorm = 149.8483, GNorm = 0.0601, lr_0 = 2.9244e-04
Loss = 6.7731e-03, PNorm = 149.8546, GNorm = 0.0829, lr_0 = 2.9224e-04
Loss = 4.4710e-03, PNorm = 149.8645, GNorm = 0.0805, lr_0 = 2.9204e-04
Loss = 3.8727e-03, PNorm = 149.8763, GNorm = 0.0817, lr_0 = 2.9184e-04
Loss = 4.2796e-03, PNorm = 149.8860, GNorm = 0.1478, lr_0 = 2.9164e-04
Loss = 5.8389e-03, PNorm = 149.8943, GNorm = 0.1443, lr_0 = 2.9144e-04
Loss = 4.1755e-03, PNorm = 149.9048, GNorm = 0.1102, lr_0 = 2.9124e-04
Validation mae = 0.476436
Epoch 17
Loss = 3.2101e-03, PNorm = 149.9141, GNorm = 0.1885, lr_0 = 2.9104e-04
Loss = 4.5885e-03, PNorm = 149.9247, GNorm = 0.3852, lr_0 = 2.9084e-04
Loss = 4.6598e-03, PNorm = 149.9300, GNorm = 0.1616, lr_0 = 2.9065e-04
Loss = 3.8032e-03, PNorm = 149.9350, GNorm = 0.2405, lr_0 = 2.9045e-04
Loss = 6.3244e-03, PNorm = 149.9445, GNorm = 0.1652, lr_0 = 2.9025e-04
Loss = 4.3952e-03, PNorm = 149.9532, GNorm = 0.2337, lr_0 = 2.9005e-04
Loss = 3.9362e-03, PNorm = 149.9627, GNorm = 0.2294, lr_0 = 2.8985e-04
Loss = 3.9899e-03, PNorm = 149.9720, GNorm = 0.2239, lr_0 = 2.8965e-04
Loss = 3.0468e-03, PNorm = 149.9818, GNorm = 0.1662, lr_0 = 2.8945e-04
Loss = 4.6835e-03, PNorm = 149.9893, GNorm = 0.1252, lr_0 = 2.8925e-04
Loss = 3.9520e-03, PNorm = 149.9957, GNorm = 0.1392, lr_0 = 2.8906e-04
Loss = 4.0426e-03, PNorm = 150.0070, GNorm = 0.3746, lr_0 = 2.8886e-04
Loss = 4.6831e-03, PNorm = 150.0166, GNorm = 0.3535, lr_0 = 2.8866e-04
Loss = 3.7788e-03, PNorm = 150.0247, GNorm = 0.1636, lr_0 = 2.8846e-04
Loss = 3.6167e-03, PNorm = 150.0361, GNorm = 0.2475, lr_0 = 2.8826e-04
Loss = 3.8923e-03, PNorm = 150.0396, GNorm = 0.1626, lr_0 = 2.8807e-04
Loss = 4.5766e-03, PNorm = 150.0440, GNorm = 0.1356, lr_0 = 2.8787e-04
Loss = 3.5922e-03, PNorm = 150.0487, GNorm = 0.0999, lr_0 = 2.8767e-04
Loss = 3.3962e-03, PNorm = 150.0554, GNorm = 0.1149, lr_0 = 2.8748e-04
Loss = 3.3472e-03, PNorm = 150.0660, GNorm = 0.1196, lr_0 = 2.8728e-04
Loss = 3.4359e-03, PNorm = 150.0752, GNorm = 0.1391, lr_0 = 2.8708e-04
Loss = 6.8750e-03, PNorm = 150.0818, GNorm = 0.2358, lr_0 = 2.8689e-04
Loss = 4.1863e-03, PNorm = 150.0917, GNorm = 0.1217, lr_0 = 2.8669e-04
Loss = 3.3622e-03, PNorm = 150.1008, GNorm = 0.0962, lr_0 = 2.8649e-04
Loss = 4.0369e-03, PNorm = 150.1085, GNorm = 0.3028, lr_0 = 2.8630e-04
Loss = 3.8548e-03, PNorm = 150.1128, GNorm = 0.3105, lr_0 = 2.8610e-04
Loss = 4.5790e-03, PNorm = 150.1190, GNorm = 0.3186, lr_0 = 2.8590e-04
Loss = 5.0759e-03, PNorm = 150.1311, GNorm = 0.2940, lr_0 = 2.8571e-04
Loss = 3.5752e-03, PNorm = 150.1396, GNorm = 0.0929, lr_0 = 2.8551e-04
Loss = 3.3651e-03, PNorm = 150.1485, GNorm = 0.3094, lr_0 = 2.8532e-04
Loss = 3.5958e-03, PNorm = 150.1570, GNorm = 0.1782, lr_0 = 2.8512e-04
Loss = 4.2872e-03, PNorm = 150.1636, GNorm = 0.1083, lr_0 = 2.8493e-04
Loss = 3.7725e-03, PNorm = 150.1685, GNorm = 0.1632, lr_0 = 2.8473e-04
Loss = 4.0461e-03, PNorm = 150.1748, GNorm = 0.2213, lr_0 = 2.8454e-04
Loss = 3.4845e-03, PNorm = 150.1817, GNorm = 0.2220, lr_0 = 2.8434e-04
Loss = 2.7193e-03, PNorm = 150.1914, GNorm = 0.0621, lr_0 = 2.8415e-04
Loss = 3.4978e-03, PNorm = 150.1999, GNorm = 0.2386, lr_0 = 2.8395e-04
Loss = 6.5736e-03, PNorm = 150.2114, GNorm = 0.1633, lr_0 = 2.8376e-04
Loss = 4.9073e-03, PNorm = 150.2179, GNorm = 0.3184, lr_0 = 2.8356e-04
Loss = 3.0758e-03, PNorm = 150.2255, GNorm = 0.1866, lr_0 = 2.8337e-04
Loss = 4.6028e-03, PNorm = 150.2314, GNorm = 0.1328, lr_0 = 2.8317e-04
Loss = 4.8528e-03, PNorm = 150.2339, GNorm = 0.1473, lr_0 = 2.8298e-04
Loss = 3.7067e-03, PNorm = 150.2399, GNorm = 0.3226, lr_0 = 2.8279e-04
Loss = 3.7185e-03, PNorm = 150.2511, GNorm = 0.1904, lr_0 = 2.8259e-04
Loss = 2.9919e-03, PNorm = 150.2616, GNorm = 0.1019, lr_0 = 2.8240e-04
Loss = 2.9373e-03, PNorm = 150.2710, GNorm = 0.2466, lr_0 = 2.8221e-04
Loss = 4.0250e-03, PNorm = 150.2766, GNorm = 0.0973, lr_0 = 2.8201e-04
Loss = 3.3074e-03, PNorm = 150.2816, GNorm = 0.3528, lr_0 = 2.8182e-04
Loss = 4.9390e-03, PNorm = 150.2895, GNorm = 0.1712, lr_0 = 2.8163e-04
Loss = 4.0340e-03, PNorm = 150.2967, GNorm = 0.1302, lr_0 = 2.8143e-04
Loss = 3.2704e-03, PNorm = 150.3049, GNorm = 0.1243, lr_0 = 2.8124e-04
Loss = 3.4109e-03, PNorm = 150.3133, GNorm = 0.3922, lr_0 = 2.8105e-04
Loss = 3.3944e-03, PNorm = 150.3230, GNorm = 0.1969, lr_0 = 2.8085e-04
Loss = 3.4519e-03, PNorm = 150.3336, GNorm = 0.1736, lr_0 = 2.8066e-04
Loss = 5.4448e-03, PNorm = 150.3420, GNorm = 0.1236, lr_0 = 2.8047e-04
Loss = 2.6170e-03, PNorm = 150.3504, GNorm = 0.1876, lr_0 = 2.8028e-04
Loss = 4.4824e-03, PNorm = 150.3587, GNorm = 0.1288, lr_0 = 2.8009e-04
Loss = 3.2182e-03, PNorm = 150.3678, GNorm = 0.1520, lr_0 = 2.7989e-04
Loss = 3.3565e-03, PNorm = 150.3729, GNorm = 0.0719, lr_0 = 2.7970e-04
Loss = 2.8548e-03, PNorm = 150.3791, GNorm = 0.2383, lr_0 = 2.7951e-04
Loss = 2.7911e-03, PNorm = 150.3879, GNorm = 0.1873, lr_0 = 2.7932e-04
Loss = 3.9085e-03, PNorm = 150.3962, GNorm = 0.2673, lr_0 = 2.7913e-04
Loss = 3.0901e-03, PNorm = 150.4058, GNorm = 0.2108, lr_0 = 2.7894e-04
Loss = 3.3300e-03, PNorm = 150.4134, GNorm = 0.0606, lr_0 = 2.7875e-04
Loss = 3.3187e-03, PNorm = 150.4220, GNorm = 0.3312, lr_0 = 2.7855e-04
Loss = 3.5354e-03, PNorm = 150.4316, GNorm = 0.0823, lr_0 = 2.7836e-04
Loss = 4.5877e-03, PNorm = 150.4392, GNorm = 0.2876, lr_0 = 2.7817e-04
Loss = 6.2104e-03, PNorm = 150.4454, GNorm = 0.2318, lr_0 = 2.7798e-04
Loss = 3.6729e-03, PNorm = 150.4522, GNorm = 0.1822, lr_0 = 2.7779e-04
Loss = 5.2470e-03, PNorm = 150.4617, GNorm = 0.2730, lr_0 = 2.7760e-04
Loss = 6.2007e-03, PNorm = 150.4727, GNorm = 0.1511, lr_0 = 2.7741e-04
Loss = 3.7468e-03, PNorm = 150.4831, GNorm = 0.2375, lr_0 = 2.7722e-04
Loss = 3.0598e-03, PNorm = 150.4947, GNorm = 0.0567, lr_0 = 2.7703e-04
Loss = 2.7915e-03, PNorm = 150.5045, GNorm = 0.0805, lr_0 = 2.7684e-04
Loss = 4.6859e-03, PNorm = 150.5117, GNorm = 0.0855, lr_0 = 2.7665e-04
Loss = 3.9517e-03, PNorm = 150.5218, GNorm = 0.0587, lr_0 = 2.7646e-04
Loss = 3.0046e-03, PNorm = 150.5287, GNorm = 0.0612, lr_0 = 2.7627e-04
Loss = 3.3693e-03, PNorm = 150.5364, GNorm = 0.0531, lr_0 = 2.7608e-04
Loss = 4.3623e-03, PNorm = 150.5414, GNorm = 0.0784, lr_0 = 2.7590e-04
Loss = 4.0756e-03, PNorm = 150.5482, GNorm = 0.1607, lr_0 = 2.7571e-04
Loss = 3.4989e-03, PNorm = 150.5560, GNorm = 0.2137, lr_0 = 2.7552e-04
Loss = 3.1252e-03, PNorm = 150.5639, GNorm = 0.0851, lr_0 = 2.7533e-04
Loss = 3.8777e-03, PNorm = 150.5722, GNorm = 0.1856, lr_0 = 2.7514e-04
Loss = 4.2072e-03, PNorm = 150.5822, GNorm = 0.2048, lr_0 = 2.7495e-04
Loss = 5.0500e-03, PNorm = 150.5904, GNorm = 0.0580, lr_0 = 2.7476e-04
Loss = 3.4298e-03, PNorm = 150.5979, GNorm = 0.0841, lr_0 = 2.7457e-04
Loss = 4.5919e-03, PNorm = 150.6057, GNorm = 0.1315, lr_0 = 2.7439e-04
Loss = 3.1419e-03, PNorm = 150.6123, GNorm = 0.2856, lr_0 = 2.7420e-04
Loss = 3.4816e-03, PNorm = 150.6208, GNorm = 0.0719, lr_0 = 2.7401e-04
Loss = 3.2824e-03, PNorm = 150.6308, GNorm = 0.1653, lr_0 = 2.7382e-04
Loss = 6.5642e-03, PNorm = 150.6382, GNorm = 0.2304, lr_0 = 2.7364e-04
Loss = 7.0093e-03, PNorm = 150.6454, GNorm = 0.3922, lr_0 = 2.7345e-04
Loss = 4.4018e-03, PNorm = 150.6551, GNorm = 0.1252, lr_0 = 2.7326e-04
Loss = 4.7170e-03, PNorm = 150.6636, GNorm = 0.3024, lr_0 = 2.7307e-04
Loss = 3.1140e-03, PNorm = 150.6718, GNorm = 0.0824, lr_0 = 2.7289e-04
Loss = 3.5383e-03, PNorm = 150.6798, GNorm = 0.1556, lr_0 = 2.7270e-04
Loss = 5.0611e-03, PNorm = 150.6878, GNorm = 0.1177, lr_0 = 2.7251e-04
Loss = 4.0349e-03, PNorm = 150.6982, GNorm = 0.2704, lr_0 = 2.7233e-04
Loss = 5.6774e-03, PNorm = 150.7074, GNorm = 0.1665, lr_0 = 2.7214e-04
Loss = 4.3246e-03, PNorm = 150.7178, GNorm = 0.0849, lr_0 = 2.7195e-04
Loss = 4.7649e-03, PNorm = 150.7260, GNorm = 0.1934, lr_0 = 2.7177e-04
Loss = 4.8962e-03, PNorm = 150.7320, GNorm = 0.2782, lr_0 = 2.7158e-04
Loss = 2.8377e-03, PNorm = 150.7415, GNorm = 0.0658, lr_0 = 2.7139e-04
Loss = 4.0470e-03, PNorm = 150.7507, GNorm = 0.3294, lr_0 = 2.7121e-04
Loss = 4.0000e-03, PNorm = 150.7611, GNorm = 0.2409, lr_0 = 2.7102e-04
Loss = 7.4982e-03, PNorm = 150.7685, GNorm = 0.1746, lr_0 = 2.7084e-04
Loss = 3.3236e-03, PNorm = 150.7767, GNorm = 0.1797, lr_0 = 2.7065e-04
Loss = 4.1310e-03, PNorm = 150.7842, GNorm = 0.1506, lr_0 = 2.7047e-04
Loss = 3.7340e-03, PNorm = 150.7898, GNorm = 0.1361, lr_0 = 2.7028e-04
Loss = 3.0428e-03, PNorm = 150.7961, GNorm = 0.1440, lr_0 = 2.7010e-04
Loss = 3.1247e-03, PNorm = 150.8023, GNorm = 0.2935, lr_0 = 2.6991e-04
Loss = 4.6617e-03, PNorm = 150.8108, GNorm = 0.0799, lr_0 = 2.6973e-04
Loss = 4.4403e-03, PNorm = 150.8194, GNorm = 0.2256, lr_0 = 2.6954e-04
Loss = 4.1608e-03, PNorm = 150.8258, GNorm = 0.2123, lr_0 = 2.6936e-04
Loss = 3.7322e-03, PNorm = 150.8353, GNorm = 0.1536, lr_0 = 2.6917e-04
Loss = 3.0840e-03, PNorm = 150.8459, GNorm = 0.1165, lr_0 = 2.6899e-04
Loss = 3.3174e-03, PNorm = 150.8567, GNorm = 0.1537, lr_0 = 2.6880e-04
Loss = 3.0563e-03, PNorm = 150.8644, GNorm = 0.0846, lr_0 = 2.6862e-04
Loss = 3.8197e-03, PNorm = 150.8728, GNorm = 0.2128, lr_0 = 2.6844e-04
Loss = 4.1142e-03, PNorm = 150.8802, GNorm = 0.1206, lr_0 = 2.6825e-04
Validation mae = 0.475593
Epoch 18
Loss = 3.3827e-03, PNorm = 150.8866, GNorm = 0.1361, lr_0 = 2.6807e-04
Loss = 2.6302e-03, PNorm = 150.8916, GNorm = 0.0716, lr_0 = 2.6788e-04
Loss = 3.8673e-03, PNorm = 150.8926, GNorm = 0.0558, lr_0 = 2.6770e-04
Loss = 3.2649e-03, PNorm = 150.8982, GNorm = 0.1123, lr_0 = 2.6752e-04
Loss = 3.2330e-03, PNorm = 150.9048, GNorm = 0.2312, lr_0 = 2.6733e-04
Loss = 3.4109e-03, PNorm = 150.9140, GNorm = 0.2202, lr_0 = 2.6715e-04
Loss = 3.1059e-03, PNorm = 150.9191, GNorm = 0.1375, lr_0 = 2.6697e-04
Loss = 3.1746e-03, PNorm = 150.9236, GNorm = 0.3479, lr_0 = 2.6678e-04
Loss = 3.3047e-03, PNorm = 150.9312, GNorm = 0.1055, lr_0 = 2.6660e-04
Loss = 2.7592e-03, PNorm = 150.9358, GNorm = 0.3285, lr_0 = 2.6642e-04
Loss = 3.2044e-03, PNorm = 150.9412, GNorm = 0.2151, lr_0 = 2.6624e-04
Loss = 3.3395e-03, PNorm = 150.9463, GNorm = 0.2276, lr_0 = 2.6605e-04
Loss = 4.5234e-03, PNorm = 150.9519, GNorm = 0.2331, lr_0 = 2.6587e-04
Loss = 3.0770e-03, PNorm = 150.9603, GNorm = 0.2221, lr_0 = 2.6569e-04
Loss = 2.7724e-03, PNorm = 150.9691, GNorm = 0.2751, lr_0 = 2.6551e-04
Loss = 3.0561e-03, PNorm = 150.9758, GNorm = 0.1599, lr_0 = 2.6533e-04
Loss = 3.3277e-03, PNorm = 150.9813, GNorm = 0.1101, lr_0 = 2.6514e-04
Loss = 2.8613e-03, PNorm = 150.9889, GNorm = 0.0718, lr_0 = 2.6496e-04
Loss = 3.5094e-03, PNorm = 150.9945, GNorm = 0.4067, lr_0 = 2.6478e-04
Loss = 5.7766e-03, PNorm = 151.0001, GNorm = 0.1762, lr_0 = 2.6460e-04
Loss = 4.4925e-03, PNorm = 151.0060, GNorm = 0.0686, lr_0 = 2.6442e-04
Loss = 2.7642e-03, PNorm = 151.0127, GNorm = 0.1433, lr_0 = 2.6424e-04
Loss = 3.3499e-03, PNorm = 151.0184, GNorm = 0.1024, lr_0 = 2.6406e-04
Loss = 3.2811e-03, PNorm = 151.0229, GNorm = 0.1826, lr_0 = 2.6388e-04
Loss = 3.6603e-03, PNorm = 151.0296, GNorm = 0.1440, lr_0 = 2.6369e-04
Loss = 5.3673e-03, PNorm = 151.0408, GNorm = 0.2335, lr_0 = 2.6351e-04
Loss = 3.5846e-03, PNorm = 151.0479, GNorm = 0.0545, lr_0 = 2.6333e-04
Loss = 3.5841e-03, PNorm = 151.0558, GNorm = 0.3025, lr_0 = 2.6315e-04
Loss = 2.7485e-03, PNorm = 151.0614, GNorm = 0.1457, lr_0 = 2.6297e-04
Loss = 2.4038e-03, PNorm = 151.0663, GNorm = 0.1295, lr_0 = 2.6279e-04
Loss = 3.7620e-03, PNorm = 151.0714, GNorm = 0.1341, lr_0 = 2.6261e-04
Loss = 3.7379e-03, PNorm = 151.0776, GNorm = 0.2070, lr_0 = 2.6243e-04
Loss = 3.8691e-03, PNorm = 151.0860, GNorm = 0.0524, lr_0 = 2.6225e-04
Loss = 3.2673e-03, PNorm = 151.0949, GNorm = 0.2381, lr_0 = 2.6207e-04
Loss = 2.7221e-03, PNorm = 151.1009, GNorm = 0.1465, lr_0 = 2.6189e-04
Loss = 2.6902e-03, PNorm = 151.1080, GNorm = 0.1918, lr_0 = 2.6171e-04
Loss = 2.8146e-03, PNorm = 151.1173, GNorm = 0.2266, lr_0 = 2.6153e-04
Loss = 2.7519e-03, PNorm = 151.1238, GNorm = 0.1348, lr_0 = 2.6136e-04
Loss = 3.5724e-03, PNorm = 151.1314, GNorm = 0.0695, lr_0 = 2.6118e-04
Loss = 2.5460e-03, PNorm = 151.1379, GNorm = 0.1764, lr_0 = 2.6100e-04
Loss = 2.5473e-03, PNorm = 151.1438, GNorm = 0.2371, lr_0 = 2.6082e-04
Loss = 3.8630e-03, PNorm = 151.1497, GNorm = 0.1416, lr_0 = 2.6064e-04
Loss = 4.1709e-03, PNorm = 151.1578, GNorm = 0.1532, lr_0 = 2.6046e-04
Loss = 3.0665e-03, PNorm = 151.1663, GNorm = 0.3710, lr_0 = 2.6028e-04
Loss = 2.4038e-03, PNorm = 151.1725, GNorm = 0.0786, lr_0 = 2.6011e-04
Loss = 6.1475e-03, PNorm = 151.1770, GNorm = 0.2735, lr_0 = 2.5993e-04
Loss = 4.3486e-03, PNorm = 151.1829, GNorm = 0.1251, lr_0 = 2.5975e-04
Loss = 2.3062e-03, PNorm = 151.1908, GNorm = 0.1289, lr_0 = 2.5957e-04
Loss = 3.7482e-03, PNorm = 151.1993, GNorm = 0.1429, lr_0 = 2.5939e-04
Loss = 2.6915e-03, PNorm = 151.2051, GNorm = 0.2302, lr_0 = 2.5922e-04
Loss = 2.8042e-03, PNorm = 151.2121, GNorm = 0.0683, lr_0 = 2.5904e-04
Loss = 2.8315e-03, PNorm = 151.2210, GNorm = 0.1777, lr_0 = 2.5886e-04
Loss = 2.7871e-03, PNorm = 151.2247, GNorm = 0.2371, lr_0 = 2.5868e-04
Loss = 2.7385e-03, PNorm = 151.2285, GNorm = 0.0987, lr_0 = 2.5851e-04
Loss = 3.0709e-03, PNorm = 151.2355, GNorm = 0.3144, lr_0 = 2.5833e-04
Loss = 3.2733e-03, PNorm = 151.2444, GNorm = 0.2010, lr_0 = 2.5815e-04
Loss = 3.2670e-03, PNorm = 151.2504, GNorm = 0.2497, lr_0 = 2.5797e-04
Loss = 3.3998e-03, PNorm = 151.2566, GNorm = 0.4810, lr_0 = 2.5780e-04
Loss = 4.8055e-03, PNorm = 151.2633, GNorm = 0.1113, lr_0 = 2.5762e-04
Loss = 3.4426e-03, PNorm = 151.2701, GNorm = 0.1700, lr_0 = 2.5745e-04
Loss = 2.7152e-03, PNorm = 151.2774, GNorm = 0.1243, lr_0 = 2.5727e-04
Loss = 4.7544e-03, PNorm = 151.2817, GNorm = 0.1242, lr_0 = 2.5709e-04
Loss = 2.6319e-03, PNorm = 151.2874, GNorm = 0.1407, lr_0 = 2.5692e-04
Loss = 4.1452e-03, PNorm = 151.2904, GNorm = 0.1013, lr_0 = 2.5674e-04
Loss = 3.0837e-03, PNorm = 151.2941, GNorm = 0.1675, lr_0 = 2.5656e-04
Loss = 3.2235e-03, PNorm = 151.3016, GNorm = 0.1338, lr_0 = 2.5639e-04
Loss = 3.6781e-03, PNorm = 151.3113, GNorm = 0.0919, lr_0 = 2.5621e-04
Loss = 4.6781e-03, PNorm = 151.3190, GNorm = 0.1003, lr_0 = 2.5604e-04
Loss = 3.5077e-03, PNorm = 151.3239, GNorm = 0.2826, lr_0 = 2.5586e-04
Loss = 2.9213e-03, PNorm = 151.3322, GNorm = 0.1234, lr_0 = 2.5569e-04
Loss = 4.6650e-03, PNorm = 151.3431, GNorm = 0.0944, lr_0 = 2.5551e-04
Loss = 2.7183e-03, PNorm = 151.3477, GNorm = 0.1164, lr_0 = 2.5534e-04
Loss = 6.3027e-03, PNorm = 151.3552, GNorm = 0.2389, lr_0 = 2.5516e-04
Loss = 3.6293e-03, PNorm = 151.3634, GNorm = 0.2671, lr_0 = 2.5499e-04
Loss = 2.3404e-03, PNorm = 151.3702, GNorm = 0.0732, lr_0 = 2.5481e-04
Loss = 3.9728e-03, PNorm = 151.3769, GNorm = 0.3496, lr_0 = 2.5464e-04
Loss = 3.1980e-03, PNorm = 151.3844, GNorm = 0.1950, lr_0 = 2.5446e-04
Loss = 3.4916e-03, PNorm = 151.3918, GNorm = 0.1968, lr_0 = 2.5429e-04
Loss = 3.0594e-03, PNorm = 151.3989, GNorm = 0.1395, lr_0 = 2.5411e-04
Loss = 3.8492e-03, PNorm = 151.4053, GNorm = 0.1736, lr_0 = 2.5394e-04
Loss = 3.7182e-03, PNorm = 151.4124, GNorm = 0.4117, lr_0 = 2.5377e-04
Loss = 4.3970e-03, PNorm = 151.4186, GNorm = 0.1491, lr_0 = 2.5359e-04
Loss = 2.3842e-03, PNorm = 151.4266, GNorm = 0.0866, lr_0 = 2.5342e-04
Loss = 3.0540e-03, PNorm = 151.4320, GNorm = 0.2937, lr_0 = 2.5325e-04
Loss = 3.5842e-03, PNorm = 151.4361, GNorm = 0.1086, lr_0 = 2.5307e-04
Loss = 3.7240e-03, PNorm = 151.4452, GNorm = 0.0961, lr_0 = 2.5290e-04
Loss = 4.3714e-03, PNorm = 151.4549, GNorm = 0.2421, lr_0 = 2.5273e-04
Loss = 4.2963e-03, PNorm = 151.4664, GNorm = 0.1485, lr_0 = 2.5255e-04
Loss = 2.8164e-03, PNorm = 151.4745, GNorm = 0.0507, lr_0 = 2.5238e-04
Loss = 3.3235e-03, PNorm = 151.4818, GNorm = 0.1878, lr_0 = 2.5221e-04
Loss = 4.8574e-03, PNorm = 151.4889, GNorm = 0.1212, lr_0 = 2.5203e-04
Loss = 3.1278e-03, PNorm = 151.4961, GNorm = 0.1718, lr_0 = 2.5186e-04
Loss = 5.4623e-03, PNorm = 151.5062, GNorm = 0.2775, lr_0 = 2.5169e-04
Loss = 5.7672e-03, PNorm = 151.5130, GNorm = 0.3993, lr_0 = 2.5152e-04
Loss = 4.3161e-03, PNorm = 151.5175, GNorm = 0.2248, lr_0 = 2.5134e-04
Loss = 4.0329e-03, PNorm = 151.5256, GNorm = 0.1103, lr_0 = 2.5117e-04
Loss = 3.1880e-03, PNorm = 151.5328, GNorm = 0.2666, lr_0 = 2.5100e-04
Loss = 2.8341e-03, PNorm = 151.5412, GNorm = 0.3105, lr_0 = 2.5083e-04
Loss = 4.5961e-03, PNorm = 151.5527, GNorm = 0.1586, lr_0 = 2.5066e-04
Loss = 3.1715e-03, PNorm = 151.5613, GNorm = 0.2075, lr_0 = 2.5048e-04
Loss = 3.7446e-03, PNorm = 151.5710, GNorm = 0.1472, lr_0 = 2.5031e-04
Loss = 2.7444e-03, PNorm = 151.5743, GNorm = 0.3666, lr_0 = 2.5014e-04
Loss = 3.4573e-03, PNorm = 151.5814, GNorm = 0.1916, lr_0 = 2.4997e-04
Loss = 3.3252e-03, PNorm = 151.5914, GNorm = 0.2208, lr_0 = 2.4980e-04
Loss = 3.4113e-03, PNorm = 151.6017, GNorm = 0.0454, lr_0 = 2.4963e-04
Loss = 7.2751e-03, PNorm = 151.6092, GNorm = 0.2891, lr_0 = 2.4946e-04
Loss = 3.9411e-03, PNorm = 151.6191, GNorm = 0.1749, lr_0 = 2.4929e-04
Loss = 3.3489e-03, PNorm = 151.6306, GNorm = 0.2176, lr_0 = 2.4911e-04
Loss = 4.9902e-03, PNorm = 151.6407, GNorm = 0.1627, lr_0 = 2.4894e-04
Loss = 3.0195e-03, PNorm = 151.6514, GNorm = 0.1587, lr_0 = 2.4877e-04
Loss = 4.1016e-03, PNorm = 151.6588, GNorm = 0.1860, lr_0 = 2.4860e-04
Loss = 3.4006e-03, PNorm = 151.6621, GNorm = 0.0671, lr_0 = 2.4843e-04
Loss = 2.6539e-03, PNorm = 151.6702, GNorm = 0.3919, lr_0 = 2.4826e-04
Loss = 3.2327e-03, PNorm = 151.6781, GNorm = 0.1223, lr_0 = 2.4809e-04
Loss = 2.6964e-03, PNorm = 151.6860, GNorm = 0.1871, lr_0 = 2.4792e-04
Loss = 3.0604e-03, PNorm = 151.6955, GNorm = 0.1556, lr_0 = 2.4775e-04
Loss = 2.8124e-03, PNorm = 151.7015, GNorm = 0.1461, lr_0 = 2.4758e-04
Loss = 2.1237e-03, PNorm = 151.7076, GNorm = 0.0482, lr_0 = 2.4741e-04
Loss = 3.0243e-03, PNorm = 151.7130, GNorm = 0.0957, lr_0 = 2.4724e-04
Loss = 2.6775e-03, PNorm = 151.7190, GNorm = 0.1354, lr_0 = 2.4707e-04
Validation mae = 0.474908
Epoch 19
Loss = 3.2173e-03, PNorm = 151.7245, GNorm = 0.3201, lr_0 = 2.4690e-04
Loss = 3.1933e-03, PNorm = 151.7272, GNorm = 0.1966, lr_0 = 2.4674e-04
Loss = 3.6994e-03, PNorm = 151.7293, GNorm = 0.0882, lr_0 = 2.4657e-04
Loss = 3.4242e-03, PNorm = 151.7334, GNorm = 0.1086, lr_0 = 2.4640e-04
Loss = 3.3856e-03, PNorm = 151.7382, GNorm = 0.4084, lr_0 = 2.4623e-04
Loss = 2.6788e-03, PNorm = 151.7420, GNorm = 0.2552, lr_0 = 2.4606e-04
Loss = 4.5614e-03, PNorm = 151.7455, GNorm = 0.0927, lr_0 = 2.4589e-04
Loss = 3.5921e-03, PNorm = 151.7484, GNorm = 0.0501, lr_0 = 2.4572e-04
Loss = 2.7791e-03, PNorm = 151.7531, GNorm = 0.1341, lr_0 = 2.4556e-04
Loss = 3.0020e-03, PNorm = 151.7589, GNorm = 0.1771, lr_0 = 2.4539e-04
Loss = 2.5009e-03, PNorm = 151.7639, GNorm = 0.1382, lr_0 = 2.4522e-04
Loss = 2.5228e-03, PNorm = 151.7671, GNorm = 0.2258, lr_0 = 2.4505e-04
Loss = 2.7589e-03, PNorm = 151.7727, GNorm = 0.1070, lr_0 = 2.4488e-04
Loss = 3.8467e-03, PNorm = 151.7755, GNorm = 0.3503, lr_0 = 2.4472e-04
Loss = 3.1493e-03, PNorm = 151.7795, GNorm = 0.1477, lr_0 = 2.4455e-04
Loss = 2.3774e-03, PNorm = 151.7856, GNorm = 0.2454, lr_0 = 2.4438e-04
Loss = 3.4775e-03, PNorm = 151.7875, GNorm = 0.4078, lr_0 = 2.4421e-04
Loss = 4.1552e-03, PNorm = 151.7933, GNorm = 0.2458, lr_0 = 2.4405e-04
Loss = 2.3151e-03, PNorm = 151.7995, GNorm = 0.1307, lr_0 = 2.4388e-04
Loss = 3.5323e-03, PNorm = 151.8044, GNorm = 0.0959, lr_0 = 2.4371e-04
Loss = 3.0962e-03, PNorm = 151.8090, GNorm = 0.0503, lr_0 = 2.4354e-04
Loss = 3.0642e-03, PNorm = 151.8154, GNorm = 0.2914, lr_0 = 2.4338e-04
Loss = 2.6247e-03, PNorm = 151.8223, GNorm = 0.1368, lr_0 = 2.4321e-04
Loss = 3.3422e-03, PNorm = 151.8247, GNorm = 0.0746, lr_0 = 2.4304e-04
Loss = 2.4469e-03, PNorm = 151.8286, GNorm = 0.1728, lr_0 = 2.4288e-04
Loss = 2.2256e-03, PNorm = 151.8359, GNorm = 0.1326, lr_0 = 2.4271e-04
Loss = 2.8587e-03, PNorm = 151.8408, GNorm = 0.1411, lr_0 = 2.4254e-04
Loss = 2.1607e-03, PNorm = 151.8458, GNorm = 0.0957, lr_0 = 2.4238e-04
Loss = 2.7050e-03, PNorm = 151.8531, GNorm = 0.0690, lr_0 = 2.4221e-04
Loss = 3.7732e-03, PNorm = 151.8610, GNorm = 0.1296, lr_0 = 2.4205e-04
Loss = 3.0834e-03, PNorm = 151.8672, GNorm = 0.0652, lr_0 = 2.4188e-04
Loss = 3.0005e-03, PNorm = 151.8708, GNorm = 0.0810, lr_0 = 2.4171e-04
Loss = 2.4984e-03, PNorm = 151.8762, GNorm = 0.0907, lr_0 = 2.4155e-04
Loss = 3.4717e-03, PNorm = 151.8837, GNorm = 0.0988, lr_0 = 2.4138e-04
Loss = 2.3096e-03, PNorm = 151.8899, GNorm = 0.2104, lr_0 = 2.4122e-04
Loss = 2.7114e-03, PNorm = 151.8958, GNorm = 0.2004, lr_0 = 2.4105e-04
Loss = 4.8763e-03, PNorm = 151.8982, GNorm = 0.1523, lr_0 = 2.4089e-04
Loss = 2.6058e-03, PNorm = 151.9019, GNorm = 0.2844, lr_0 = 2.4072e-04
Loss = 3.8832e-03, PNorm = 151.9052, GNorm = 0.0930, lr_0 = 2.4056e-04
Loss = 2.0005e-03, PNorm = 151.9091, GNorm = 0.1507, lr_0 = 2.4039e-04
Loss = 4.8372e-03, PNorm = 151.9148, GNorm = 0.1305, lr_0 = 2.4023e-04
Loss = 2.0112e-03, PNorm = 151.9204, GNorm = 0.0972, lr_0 = 2.4006e-04
Loss = 3.4034e-03, PNorm = 151.9242, GNorm = 0.2242, lr_0 = 2.3990e-04
Loss = 2.1928e-03, PNorm = 151.9267, GNorm = 0.0850, lr_0 = 2.3974e-04
Loss = 2.9365e-03, PNorm = 151.9305, GNorm = 0.1668, lr_0 = 2.3957e-04
Loss = 3.0672e-03, PNorm = 151.9373, GNorm = 0.3025, lr_0 = 2.3941e-04
Loss = 2.8232e-03, PNorm = 151.9440, GNorm = 0.1644, lr_0 = 2.3924e-04
Loss = 2.4388e-03, PNorm = 151.9479, GNorm = 0.1486, lr_0 = 2.3908e-04
Loss = 2.4330e-03, PNorm = 151.9537, GNorm = 0.2364, lr_0 = 2.3892e-04
Loss = 2.6544e-03, PNorm = 151.9589, GNorm = 0.0568, lr_0 = 2.3875e-04
Loss = 5.4339e-03, PNorm = 151.9625, GNorm = 0.1573, lr_0 = 2.3859e-04
Loss = 2.0878e-03, PNorm = 151.9666, GNorm = 0.2041, lr_0 = 2.3842e-04
Loss = 2.2506e-03, PNorm = 151.9739, GNorm = 0.2223, lr_0 = 2.3826e-04
Loss = 2.0898e-03, PNorm = 151.9799, GNorm = 0.1904, lr_0 = 2.3810e-04
Loss = 2.5730e-03, PNorm = 151.9867, GNorm = 0.1424, lr_0 = 2.3794e-04
Loss = 3.8029e-03, PNorm = 151.9908, GNorm = 0.0850, lr_0 = 2.3777e-04
Loss = 5.6354e-03, PNorm = 151.9969, GNorm = 0.2643, lr_0 = 2.3761e-04
Loss = 3.5593e-03, PNorm = 152.0034, GNorm = 0.3201, lr_0 = 2.3745e-04
Loss = 4.4482e-03, PNorm = 152.0109, GNorm = 0.3441, lr_0 = 2.3728e-04
Loss = 2.9796e-03, PNorm = 152.0200, GNorm = 0.0554, lr_0 = 2.3712e-04
Loss = 3.3005e-03, PNorm = 152.0246, GNorm = 0.1936, lr_0 = 2.3696e-04
Loss = 2.3361e-03, PNorm = 152.0293, GNorm = 0.1506, lr_0 = 2.3680e-04
Loss = 2.8498e-03, PNorm = 152.0346, GNorm = 0.1497, lr_0 = 2.3663e-04
Loss = 3.9373e-03, PNorm = 152.0401, GNorm = 0.1576, lr_0 = 2.3647e-04
Loss = 4.0702e-03, PNorm = 152.0449, GNorm = 0.2924, lr_0 = 2.3631e-04
Loss = 2.4403e-03, PNorm = 152.0490, GNorm = 0.1973, lr_0 = 2.3615e-04
Loss = 2.3373e-03, PNorm = 152.0544, GNorm = 0.0987, lr_0 = 2.3599e-04
Loss = 2.9609e-03, PNorm = 152.0604, GNorm = 0.1923, lr_0 = 2.3582e-04
Loss = 3.4479e-03, PNorm = 152.0658, GNorm = 0.1240, lr_0 = 2.3566e-04
Loss = 3.0837e-03, PNorm = 152.0715, GNorm = 0.1658, lr_0 = 2.3550e-04
Loss = 2.2348e-03, PNorm = 152.0796, GNorm = 0.1684, lr_0 = 2.3534e-04
Loss = 2.6409e-03, PNorm = 152.0870, GNorm = 0.0594, lr_0 = 2.3518e-04
Loss = 2.2604e-03, PNorm = 152.0936, GNorm = 0.0630, lr_0 = 2.3502e-04
Loss = 2.9845e-03, PNorm = 152.1004, GNorm = 0.1736, lr_0 = 2.3486e-04
Loss = 3.7885e-03, PNorm = 152.1067, GNorm = 0.2117, lr_0 = 2.3470e-04
Loss = 3.0008e-03, PNorm = 152.1106, GNorm = 0.0959, lr_0 = 2.3454e-04
Loss = 1.9689e-03, PNorm = 152.1143, GNorm = 0.2643, lr_0 = 2.3437e-04
Loss = 3.6443e-03, PNorm = 152.1200, GNorm = 0.2101, lr_0 = 2.3421e-04
Loss = 2.2621e-03, PNorm = 152.1257, GNorm = 0.2120, lr_0 = 2.3405e-04
Loss = 2.8656e-03, PNorm = 152.1328, GNorm = 0.1511, lr_0 = 2.3389e-04
Loss = 2.9110e-03, PNorm = 152.1422, GNorm = 0.2899, lr_0 = 2.3373e-04
Loss = 4.1591e-03, PNorm = 152.1480, GNorm = 0.4717, lr_0 = 2.3357e-04
Loss = 3.0317e-03, PNorm = 152.1504, GNorm = 0.0749, lr_0 = 2.3341e-04
Loss = 2.7668e-03, PNorm = 152.1552, GNorm = 0.0700, lr_0 = 2.3325e-04
Loss = 3.0660e-03, PNorm = 152.1634, GNorm = 0.1139, lr_0 = 2.3309e-04
Loss = 2.1255e-03, PNorm = 152.1714, GNorm = 0.1221, lr_0 = 2.3293e-04
Loss = 3.9732e-03, PNorm = 152.1776, GNorm = 0.3268, lr_0 = 2.3277e-04
Loss = 2.0623e-03, PNorm = 152.1812, GNorm = 0.1615, lr_0 = 2.3261e-04
Loss = 2.3615e-03, PNorm = 152.1858, GNorm = 0.2808, lr_0 = 2.3246e-04
Loss = 2.6806e-03, PNorm = 152.1892, GNorm = 0.1490, lr_0 = 2.3230e-04
Loss = 3.0452e-03, PNorm = 152.1939, GNorm = 0.2393, lr_0 = 2.3214e-04
Loss = 2.1370e-03, PNorm = 152.1992, GNorm = 0.2034, lr_0 = 2.3198e-04
Loss = 3.8295e-03, PNorm = 152.2064, GNorm = 0.4962, lr_0 = 2.3182e-04
Loss = 2.4791e-03, PNorm = 152.2122, GNorm = 0.2294, lr_0 = 2.3166e-04
Loss = 2.4454e-03, PNorm = 152.2198, GNorm = 0.2993, lr_0 = 2.3150e-04
Loss = 1.9081e-03, PNorm = 152.2296, GNorm = 0.0512, lr_0 = 2.3134e-04
Loss = 3.2561e-03, PNorm = 152.2376, GNorm = 0.0592, lr_0 = 2.3118e-04
Loss = 3.3822e-03, PNorm = 152.2435, GNorm = 0.1288, lr_0 = 2.3103e-04
Loss = 5.4036e-03, PNorm = 152.2443, GNorm = 0.2887, lr_0 = 2.3087e-04
Loss = 2.7738e-03, PNorm = 152.2493, GNorm = 0.2730, lr_0 = 2.3071e-04
Loss = 2.5458e-03, PNorm = 152.2507, GNorm = 0.1173, lr_0 = 2.3055e-04
Loss = 6.5779e-03, PNorm = 152.2559, GNorm = 0.4184, lr_0 = 2.3039e-04
Loss = 4.0776e-03, PNorm = 152.2631, GNorm = 0.1148, lr_0 = 2.3024e-04
Loss = 3.6935e-03, PNorm = 152.2697, GNorm = 0.2472, lr_0 = 2.3008e-04
Loss = 2.9277e-03, PNorm = 152.2808, GNorm = 0.2747, lr_0 = 2.2992e-04
Loss = 2.8530e-03, PNorm = 152.2897, GNorm = 0.0672, lr_0 = 2.2976e-04
Loss = 2.9120e-03, PNorm = 152.2953, GNorm = 0.1188, lr_0 = 2.2961e-04
Loss = 2.3662e-03, PNorm = 152.3023, GNorm = 0.3280, lr_0 = 2.2945e-04
Loss = 3.8909e-03, PNorm = 152.3083, GNorm = 0.0745, lr_0 = 2.2929e-04
Loss = 2.1194e-03, PNorm = 152.3150, GNorm = 0.2176, lr_0 = 2.2913e-04
Loss = 2.6577e-03, PNorm = 152.3199, GNorm = 0.0867, lr_0 = 2.2898e-04
Loss = 2.8787e-03, PNorm = 152.3245, GNorm = 0.0923, lr_0 = 2.2882e-04
Loss = 2.5354e-03, PNorm = 152.3321, GNorm = 0.1540, lr_0 = 2.2866e-04
Loss = 3.6095e-03, PNorm = 152.3368, GNorm = 0.1080, lr_0 = 2.2851e-04
Loss = 2.3391e-03, PNorm = 152.3436, GNorm = 0.0628, lr_0 = 2.2835e-04
Loss = 3.6587e-03, PNorm = 152.3509, GNorm = 0.0918, lr_0 = 2.2819e-04
Loss = 2.0431e-03, PNorm = 152.3596, GNorm = 0.1672, lr_0 = 2.2804e-04
Loss = 2.3979e-03, PNorm = 152.3654, GNorm = 0.1548, lr_0 = 2.2788e-04
Loss = 2.7392e-03, PNorm = 152.3739, GNorm = 0.0891, lr_0 = 2.2773e-04
Loss = 2.5316e-03, PNorm = 152.3802, GNorm = 0.0410, lr_0 = 2.2757e-04
Validation mae = 0.475961
Epoch 20
Loss = 3.1466e-03, PNorm = 152.3836, GNorm = 0.3714, lr_0 = 2.2741e-04
Loss = 2.5975e-03, PNorm = 152.3866, GNorm = 0.0604, lr_0 = 2.2726e-04
Loss = 1.9060e-03, PNorm = 152.3911, GNorm = 0.1161, lr_0 = 2.2710e-04
Loss = 2.0035e-03, PNorm = 152.3932, GNorm = 0.1363, lr_0 = 2.2695e-04
Loss = 2.1131e-03, PNorm = 152.3961, GNorm = 0.0507, lr_0 = 2.2679e-04
Loss = 1.8944e-03, PNorm = 152.4001, GNorm = 0.0760, lr_0 = 2.2664e-04
Loss = 1.9329e-03, PNorm = 152.4023, GNorm = 0.1608, lr_0 = 2.2648e-04
Loss = 3.0022e-03, PNorm = 152.4067, GNorm = 0.3900, lr_0 = 2.2632e-04
Loss = 2.0397e-03, PNorm = 152.4106, GNorm = 0.0960, lr_0 = 2.2617e-04
Loss = 1.8466e-03, PNorm = 152.4144, GNorm = 0.2095, lr_0 = 2.2601e-04
Loss = 2.1334e-03, PNorm = 152.4194, GNorm = 0.2793, lr_0 = 2.2586e-04
Loss = 2.0883e-03, PNorm = 152.4231, GNorm = 0.1801, lr_0 = 2.2571e-04
Loss = 2.7802e-03, PNorm = 152.4248, GNorm = 0.1215, lr_0 = 2.2555e-04
Loss = 1.8720e-03, PNorm = 152.4289, GNorm = 0.2008, lr_0 = 2.2540e-04
Loss = 1.7743e-03, PNorm = 152.4313, GNorm = 0.1801, lr_0 = 2.2524e-04
Loss = 2.0890e-03, PNorm = 152.4357, GNorm = 0.0797, lr_0 = 2.2509e-04
Loss = 2.4079e-03, PNorm = 152.4414, GNorm = 0.2332, lr_0 = 2.2493e-04
Loss = 2.7147e-03, PNorm = 152.4476, GNorm = 0.2864, lr_0 = 2.2478e-04
Loss = 2.2503e-03, PNorm = 152.4542, GNorm = 0.0697, lr_0 = 2.2463e-04
Loss = 1.7391e-03, PNorm = 152.4573, GNorm = 0.2360, lr_0 = 2.2447e-04
Loss = 1.7777e-03, PNorm = 152.4578, GNorm = 0.1368, lr_0 = 2.2432e-04
Loss = 1.7646e-03, PNorm = 152.4621, GNorm = 0.0395, lr_0 = 2.2416e-04
Loss = 2.9922e-03, PNorm = 152.4657, GNorm = 0.0453, lr_0 = 2.2401e-04
Loss = 2.4406e-03, PNorm = 152.4722, GNorm = 0.0736, lr_0 = 2.2386e-04
Loss = 2.4601e-03, PNorm = 152.4748, GNorm = 0.1810, lr_0 = 2.2370e-04
Loss = 3.3459e-03, PNorm = 152.4794, GNorm = 0.0968, lr_0 = 2.2355e-04
Loss = 1.9097e-03, PNorm = 152.4885, GNorm = 0.0983, lr_0 = 2.2340e-04
Loss = 2.6628e-03, PNorm = 152.4919, GNorm = 0.0485, lr_0 = 2.2324e-04
Loss = 2.8697e-03, PNorm = 152.4976, GNorm = 0.1204, lr_0 = 2.2309e-04
Loss = 4.4852e-03, PNorm = 152.5022, GNorm = 0.1838, lr_0 = 2.2294e-04
Loss = 2.9464e-03, PNorm = 152.5055, GNorm = 0.2066, lr_0 = 2.2279e-04
Loss = 2.2909e-03, PNorm = 152.5073, GNorm = 0.1295, lr_0 = 2.2263e-04
Loss = 2.7752e-03, PNorm = 152.5120, GNorm = 0.1262, lr_0 = 2.2248e-04
Loss = 3.6555e-03, PNorm = 152.5160, GNorm = 0.1278, lr_0 = 2.2233e-04
Loss = 2.4866e-03, PNorm = 152.5235, GNorm = 0.0788, lr_0 = 2.2218e-04
Loss = 2.3486e-03, PNorm = 152.5323, GNorm = 0.2097, lr_0 = 2.2202e-04
Loss = 2.6772e-03, PNorm = 152.5375, GNorm = 0.1734, lr_0 = 2.2187e-04
Loss = 1.7522e-03, PNorm = 152.5448, GNorm = 0.1410, lr_0 = 2.2172e-04
Loss = 1.9134e-03, PNorm = 152.5483, GNorm = 0.2480, lr_0 = 2.2157e-04
Loss = 2.2926e-03, PNorm = 152.5515, GNorm = 0.0556, lr_0 = 2.2142e-04
Loss = 2.0117e-03, PNorm = 152.5550, GNorm = 0.1557, lr_0 = 2.2126e-04
Loss = 2.6715e-03, PNorm = 152.5581, GNorm = 0.0631, lr_0 = 2.2111e-04
Loss = 1.8338e-03, PNorm = 152.5607, GNorm = 0.1028, lr_0 = 2.2096e-04
Loss = 2.4127e-03, PNorm = 152.5667, GNorm = 0.1828, lr_0 = 2.2081e-04
Loss = 2.1413e-03, PNorm = 152.5687, GNorm = 0.2345, lr_0 = 2.2066e-04
Loss = 3.1145e-03, PNorm = 152.5728, GNorm = 0.2792, lr_0 = 2.2051e-04
Loss = 2.5445e-03, PNorm = 152.5770, GNorm = 0.0595, lr_0 = 2.2036e-04
Loss = 1.9839e-03, PNorm = 152.5826, GNorm = 0.0864, lr_0 = 2.2021e-04
Loss = 2.3793e-03, PNorm = 152.5895, GNorm = 0.1224, lr_0 = 2.2005e-04
Loss = 1.8467e-03, PNorm = 152.5934, GNorm = 0.1872, lr_0 = 2.1990e-04
Loss = 2.3064e-03, PNorm = 152.5983, GNorm = 0.1208, lr_0 = 2.1975e-04
Loss = 3.5006e-03, PNorm = 152.6001, GNorm = 0.1255, lr_0 = 2.1960e-04
Loss = 2.6789e-03, PNorm = 152.6029, GNorm = 0.1446, lr_0 = 2.1945e-04
Loss = 3.2147e-03, PNorm = 152.6046, GNorm = 0.0437, lr_0 = 2.1930e-04
Loss = 2.2265e-03, PNorm = 152.6098, GNorm = 0.0886, lr_0 = 2.1915e-04
Loss = 1.7479e-03, PNorm = 152.6145, GNorm = 0.0954, lr_0 = 2.1900e-04
Loss = 2.6124e-03, PNorm = 152.6190, GNorm = 0.1796, lr_0 = 2.1885e-04
Loss = 2.2183e-03, PNorm = 152.6261, GNorm = 0.2004, lr_0 = 2.1870e-04
Loss = 4.0295e-03, PNorm = 152.6310, GNorm = 0.1397, lr_0 = 2.1855e-04
Loss = 1.8762e-03, PNorm = 152.6373, GNorm = 0.0619, lr_0 = 2.1840e-04
Loss = 1.7900e-03, PNorm = 152.6423, GNorm = 0.1282, lr_0 = 2.1825e-04
Loss = 2.2877e-03, PNorm = 152.6467, GNorm = 0.1438, lr_0 = 2.1810e-04
Loss = 2.6044e-03, PNorm = 152.6481, GNorm = 0.0700, lr_0 = 2.1795e-04
Loss = 4.2773e-03, PNorm = 152.6506, GNorm = 0.0630, lr_0 = 2.1780e-04
Loss = 3.2227e-03, PNorm = 152.6542, GNorm = 0.2264, lr_0 = 2.1765e-04
Loss = 2.8381e-03, PNorm = 152.6589, GNorm = 0.0575, lr_0 = 2.1751e-04
Loss = 2.8231e-03, PNorm = 152.6646, GNorm = 0.3030, lr_0 = 2.1736e-04
Loss = 2.1978e-03, PNorm = 152.6699, GNorm = 0.1241, lr_0 = 2.1721e-04
Loss = 2.7002e-03, PNorm = 152.6752, GNorm = 0.1120, lr_0 = 2.1706e-04
Loss = 2.2300e-03, PNorm = 152.6817, GNorm = 0.2744, lr_0 = 2.1691e-04
Loss = 2.0933e-03, PNorm = 152.6883, GNorm = 0.0785, lr_0 = 2.1676e-04
Loss = 2.8735e-03, PNorm = 152.6943, GNorm = 0.0624, lr_0 = 2.1661e-04
Loss = 5.2711e-03, PNorm = 152.7008, GNorm = 0.0679, lr_0 = 2.1646e-04
Loss = 1.8463e-03, PNorm = 152.7033, GNorm = 0.1779, lr_0 = 2.1632e-04
Loss = 2.4824e-03, PNorm = 152.7073, GNorm = 0.1501, lr_0 = 2.1617e-04
Loss = 2.2770e-03, PNorm = 152.7150, GNorm = 0.2229, lr_0 = 2.1602e-04
Loss = 3.8290e-03, PNorm = 152.7185, GNorm = 0.0556, lr_0 = 2.1587e-04
Loss = 2.2976e-03, PNorm = 152.7241, GNorm = 0.0889, lr_0 = 2.1572e-04
Loss = 3.2036e-03, PNorm = 152.7316, GNorm = 0.2404, lr_0 = 2.1558e-04
Loss = 2.0977e-03, PNorm = 152.7397, GNorm = 0.3031, lr_0 = 2.1543e-04
Loss = 2.7051e-03, PNorm = 152.7450, GNorm = 0.0796, lr_0 = 2.1528e-04
Loss = 2.6369e-03, PNorm = 152.7528, GNorm = 0.2394, lr_0 = 2.1513e-04
Loss = 2.9323e-03, PNorm = 152.7599, GNorm = 0.1971, lr_0 = 2.1499e-04
Loss = 2.2617e-03, PNorm = 152.7641, GNorm = 0.0692, lr_0 = 2.1484e-04
Loss = 3.2058e-03, PNorm = 152.7678, GNorm = 0.0423, lr_0 = 2.1469e-04
Loss = 3.5609e-03, PNorm = 152.7699, GNorm = 0.3209, lr_0 = 2.1454e-04
Loss = 4.7476e-03, PNorm = 152.7744, GNorm = 0.0388, lr_0 = 2.1440e-04
Loss = 3.1052e-03, PNorm = 152.7799, GNorm = 0.1959, lr_0 = 2.1425e-04
Loss = 2.1168e-03, PNorm = 152.7845, GNorm = 0.1493, lr_0 = 2.1410e-04
Loss = 2.5071e-03, PNorm = 152.7883, GNorm = 0.1268, lr_0 = 2.1396e-04
Loss = 2.5623e-03, PNorm = 152.7939, GNorm = 0.0436, lr_0 = 2.1381e-04
Loss = 2.2976e-03, PNorm = 152.7995, GNorm = 0.1032, lr_0 = 2.1366e-04
Loss = 2.6049e-03, PNorm = 152.8048, GNorm = 0.1438, lr_0 = 2.1352e-04
Loss = 3.3821e-03, PNorm = 152.8086, GNorm = 0.0888, lr_0 = 2.1337e-04
Loss = 2.5255e-03, PNorm = 152.8106, GNorm = 0.1047, lr_0 = 2.1323e-04
Loss = 1.6067e-03, PNorm = 152.8145, GNorm = 0.0811, lr_0 = 2.1308e-04
Loss = 2.2452e-03, PNorm = 152.8198, GNorm = 0.1222, lr_0 = 2.1293e-04
Loss = 1.6706e-03, PNorm = 152.8243, GNorm = 0.1481, lr_0 = 2.1279e-04
Loss = 2.8006e-03, PNorm = 152.8287, GNorm = 0.0965, lr_0 = 2.1264e-04
Loss = 2.5469e-03, PNorm = 152.8324, GNorm = 0.1016, lr_0 = 2.1250e-04
Loss = 3.2113e-03, PNorm = 152.8348, GNorm = 0.0993, lr_0 = 2.1235e-04
Loss = 2.5324e-03, PNorm = 152.8414, GNorm = 0.1956, lr_0 = 2.1221e-04
Loss = 4.8360e-03, PNorm = 152.8470, GNorm = 0.1254, lr_0 = 2.1206e-04
Loss = 3.6179e-03, PNorm = 152.8511, GNorm = 0.0984, lr_0 = 2.1191e-04
Loss = 2.2196e-03, PNorm = 152.8561, GNorm = 0.0900, lr_0 = 2.1177e-04
Loss = 5.3302e-03, PNorm = 152.8620, GNorm = 0.1942, lr_0 = 2.1162e-04
Loss = 5.6518e-03, PNorm = 152.8669, GNorm = 0.2272, lr_0 = 2.1148e-04
Loss = 3.0507e-03, PNorm = 152.8733, GNorm = 0.2487, lr_0 = 2.1133e-04
Loss = 2.6582e-03, PNorm = 152.8764, GNorm = 0.1176, lr_0 = 2.1119e-04
Loss = 3.3972e-03, PNorm = 152.8792, GNorm = 0.0840, lr_0 = 2.1104e-04
Loss = 2.6684e-03, PNorm = 152.8861, GNorm = 0.0445, lr_0 = 2.1090e-04
Loss = 2.4948e-03, PNorm = 152.8918, GNorm = 0.1063, lr_0 = 2.1076e-04
Loss = 1.9661e-03, PNorm = 152.8973, GNorm = 0.2519, lr_0 = 2.1061e-04
Loss = 2.1308e-03, PNorm = 152.9010, GNorm = 0.0547, lr_0 = 2.1047e-04
Loss = 2.4073e-03, PNorm = 152.9050, GNorm = 0.1759, lr_0 = 2.1032e-04
Loss = 4.4531e-03, PNorm = 152.9081, GNorm = 0.0872, lr_0 = 2.1018e-04
Loss = 4.0043e-03, PNorm = 152.9140, GNorm = 0.2233, lr_0 = 2.1003e-04
Loss = 1.7420e-03, PNorm = 152.9209, GNorm = 0.1624, lr_0 = 2.0989e-04
Loss = 3.5188e-03, PNorm = 152.9253, GNorm = 0.1594, lr_0 = 2.0975e-04
Loss = 2.3065e-03, PNorm = 152.9291, GNorm = 0.0842, lr_0 = 2.0960e-04
Validation mae = 0.476081
Epoch 21
Loss = 2.0519e-03, PNorm = 152.9345, GNorm = 0.0664, lr_0 = 2.0946e-04
Loss = 2.2564e-03, PNorm = 152.9390, GNorm = 0.1220, lr_0 = 2.0932e-04
Loss = 1.7553e-03, PNorm = 152.9439, GNorm = 0.2340, lr_0 = 2.0917e-04
Loss = 4.7682e-03, PNorm = 152.9470, GNorm = 0.2560, lr_0 = 2.0903e-04
Loss = 1.9547e-03, PNorm = 152.9524, GNorm = 0.1246, lr_0 = 2.0889e-04
Loss = 2.5387e-03, PNorm = 152.9548, GNorm = 0.1546, lr_0 = 2.0874e-04
Loss = 1.6796e-03, PNorm = 152.9578, GNorm = 0.0439, lr_0 = 2.0860e-04
Loss = 1.7348e-03, PNorm = 152.9599, GNorm = 0.1158, lr_0 = 2.0846e-04
Loss = 2.5181e-03, PNorm = 152.9636, GNorm = 0.0854, lr_0 = 2.0831e-04
Loss = 2.6122e-03, PNorm = 152.9679, GNorm = 0.2360, lr_0 = 2.0817e-04
Loss = 2.5726e-03, PNorm = 152.9738, GNorm = 0.1462, lr_0 = 2.0803e-04
Loss = 1.7968e-03, PNorm = 152.9793, GNorm = 0.0769, lr_0 = 2.0789e-04
Loss = 1.8052e-03, PNorm = 152.9822, GNorm = 0.1879, lr_0 = 2.0774e-04
Loss = 2.2249e-03, PNorm = 152.9853, GNorm = 0.0910, lr_0 = 2.0760e-04
Loss = 2.3310e-03, PNorm = 152.9894, GNorm = 0.0458, lr_0 = 2.0746e-04
Loss = 2.7164e-03, PNorm = 152.9922, GNorm = 0.0877, lr_0 = 2.0732e-04
Loss = 3.0040e-03, PNorm = 152.9934, GNorm = 0.3523, lr_0 = 2.0718e-04
Loss = 1.9061e-03, PNorm = 152.9955, GNorm = 0.1996, lr_0 = 2.0703e-04
Loss = 4.1949e-03, PNorm = 153.0006, GNorm = 0.0666, lr_0 = 2.0689e-04
Loss = 1.9274e-03, PNorm = 153.0045, GNorm = 0.1118, lr_0 = 2.0675e-04
Loss = 3.2069e-03, PNorm = 153.0092, GNorm = 0.3145, lr_0 = 2.0661e-04
Loss = 1.8431e-03, PNorm = 153.0127, GNorm = 0.0378, lr_0 = 2.0647e-04
Loss = 1.2818e-03, PNorm = 153.0172, GNorm = 0.1069, lr_0 = 2.0633e-04
Loss = 1.7753e-03, PNorm = 153.0198, GNorm = 0.0324, lr_0 = 2.0618e-04
Loss = 1.9868e-03, PNorm = 153.0246, GNorm = 0.0670, lr_0 = 2.0604e-04
Loss = 1.7976e-03, PNorm = 153.0284, GNorm = 0.1579, lr_0 = 2.0590e-04
Loss = 2.1487e-03, PNorm = 153.0310, GNorm = 0.2294, lr_0 = 2.0576e-04
Loss = 2.1389e-03, PNorm = 153.0374, GNorm = 0.1314, lr_0 = 2.0562e-04
Loss = 2.8202e-03, PNorm = 153.0436, GNorm = 0.0980, lr_0 = 2.0548e-04
Loss = 2.5995e-03, PNorm = 153.0510, GNorm = 0.1259, lr_0 = 2.0534e-04
Loss = 1.7280e-03, PNorm = 153.0548, GNorm = 0.1235, lr_0 = 2.0520e-04
Loss = 1.7540e-03, PNorm = 153.0607, GNorm = 0.1288, lr_0 = 2.0506e-04
Loss = 1.8864e-03, PNorm = 153.0635, GNorm = 0.1211, lr_0 = 2.0492e-04
Loss = 2.7934e-03, PNorm = 153.0674, GNorm = 0.1270, lr_0 = 2.0478e-04
Loss = 1.7097e-03, PNorm = 153.0719, GNorm = 0.1251, lr_0 = 2.0464e-04
Loss = 1.7866e-03, PNorm = 153.0752, GNorm = 0.1121, lr_0 = 2.0450e-04
Loss = 1.8518e-03, PNorm = 153.0788, GNorm = 0.0841, lr_0 = 2.0436e-04
Loss = 2.8525e-03, PNorm = 153.0818, GNorm = 0.0557, lr_0 = 2.0422e-04
Loss = 3.1748e-03, PNorm = 153.0855, GNorm = 0.0797, lr_0 = 2.0408e-04
Loss = 1.8412e-03, PNorm = 153.0885, GNorm = 0.1644, lr_0 = 2.0394e-04
Loss = 1.7723e-03, PNorm = 153.0918, GNorm = 0.0580, lr_0 = 2.0380e-04
Loss = 1.5870e-03, PNorm = 153.0937, GNorm = 0.1048, lr_0 = 2.0366e-04
Loss = 2.6451e-03, PNorm = 153.0979, GNorm = 0.0867, lr_0 = 2.0352e-04
Loss = 3.7904e-03, PNorm = 153.1048, GNorm = 0.0555, lr_0 = 2.0338e-04
Loss = 2.5818e-03, PNorm = 153.1096, GNorm = 0.1454, lr_0 = 2.0324e-04
Loss = 2.1108e-03, PNorm = 153.1147, GNorm = 0.1001, lr_0 = 2.0310e-04
Loss = 1.7647e-03, PNorm = 153.1176, GNorm = 0.0705, lr_0 = 2.0296e-04
Loss = 2.1066e-03, PNorm = 153.1194, GNorm = 0.1044, lr_0 = 2.0282e-04
Loss = 2.2551e-03, PNorm = 153.1243, GNorm = 0.1504, lr_0 = 2.0268e-04
Loss = 1.4647e-03, PNorm = 153.1306, GNorm = 0.0868, lr_0 = 2.0254e-04
Loss = 2.5266e-03, PNorm = 153.1350, GNorm = 0.1642, lr_0 = 2.0240e-04
Loss = 1.7477e-03, PNorm = 153.1392, GNorm = 0.2259, lr_0 = 2.0227e-04
Loss = 1.5104e-03, PNorm = 153.1459, GNorm = 0.0776, lr_0 = 2.0213e-04
Loss = 2.9010e-03, PNorm = 153.1513, GNorm = 0.1152, lr_0 = 2.0199e-04
Loss = 2.9185e-03, PNorm = 153.1552, GNorm = 0.1308, lr_0 = 2.0185e-04
Loss = 1.8348e-03, PNorm = 153.1575, GNorm = 0.1600, lr_0 = 2.0171e-04
Loss = 1.6251e-03, PNorm = 153.1626, GNorm = 0.0703, lr_0 = 2.0157e-04
Loss = 3.8907e-03, PNorm = 153.1654, GNorm = 0.0657, lr_0 = 2.0144e-04
Loss = 1.9666e-03, PNorm = 153.1685, GNorm = 0.1358, lr_0 = 2.0130e-04
Loss = 2.4839e-03, PNorm = 153.1732, GNorm = 0.1517, lr_0 = 2.0116e-04
Loss = 1.8291e-03, PNorm = 153.1781, GNorm = 0.0363, lr_0 = 2.0102e-04
Loss = 2.4904e-03, PNorm = 153.1820, GNorm = 0.0456, lr_0 = 2.0088e-04
Loss = 2.5743e-03, PNorm = 153.1856, GNorm = 0.1783, lr_0 = 2.0075e-04
Loss = 2.5271e-03, PNorm = 153.1899, GNorm = 0.1046, lr_0 = 2.0061e-04
Loss = 4.1675e-03, PNorm = 153.1943, GNorm = 0.2476, lr_0 = 2.0047e-04
Loss = 3.6688e-03, PNorm = 153.2009, GNorm = 0.1943, lr_0 = 2.0033e-04
Loss = 2.0951e-03, PNorm = 153.2052, GNorm = 0.0791, lr_0 = 2.0020e-04
Loss = 1.4204e-03, PNorm = 153.2088, GNorm = 0.1018, lr_0 = 2.0006e-04
Loss = 1.8889e-03, PNorm = 153.2141, GNorm = 0.0524, lr_0 = 1.9992e-04
Loss = 1.5996e-03, PNorm = 153.2187, GNorm = 0.0852, lr_0 = 1.9979e-04
Loss = 3.2289e-03, PNorm = 153.2221, GNorm = 0.0702, lr_0 = 1.9965e-04
Loss = 1.8669e-03, PNorm = 153.2265, GNorm = 0.1839, lr_0 = 1.9951e-04
Loss = 1.9995e-03, PNorm = 153.2307, GNorm = 0.1807, lr_0 = 1.9938e-04
Loss = 1.8824e-03, PNorm = 153.2339, GNorm = 0.1205, lr_0 = 1.9924e-04
Loss = 2.0121e-03, PNorm = 153.2378, GNorm = 0.0981, lr_0 = 1.9910e-04
Loss = 1.5733e-03, PNorm = 153.2400, GNorm = 0.1460, lr_0 = 1.9897e-04
Loss = 1.3012e-03, PNorm = 153.2416, GNorm = 0.0298, lr_0 = 1.9883e-04
Loss = 3.5895e-03, PNorm = 153.2443, GNorm = 0.1157, lr_0 = 1.9869e-04
Loss = 2.9938e-03, PNorm = 153.2476, GNorm = 0.2624, lr_0 = 1.9856e-04
Loss = 1.4629e-03, PNorm = 153.2503, GNorm = 0.0474, lr_0 = 1.9842e-04
Loss = 1.5695e-03, PNorm = 153.2549, GNorm = 0.1583, lr_0 = 1.9829e-04
Loss = 1.8930e-03, PNorm = 153.2588, GNorm = 0.2869, lr_0 = 1.9815e-04
Loss = 1.8961e-03, PNorm = 153.2613, GNorm = 0.2065, lr_0 = 1.9801e-04
Loss = 2.4308e-03, PNorm = 153.2677, GNorm = 0.1012, lr_0 = 1.9788e-04
Loss = 2.7289e-03, PNorm = 153.2703, GNorm = 0.1945, lr_0 = 1.9774e-04
Loss = 2.2981e-03, PNorm = 153.2748, GNorm = 0.1281, lr_0 = 1.9761e-04
Loss = 1.4415e-03, PNorm = 153.2797, GNorm = 0.1419, lr_0 = 1.9747e-04
Loss = 1.9353e-03, PNorm = 153.2849, GNorm = 0.0761, lr_0 = 1.9734e-04
Loss = 1.8183e-03, PNorm = 153.2937, GNorm = 0.2597, lr_0 = 1.9720e-04
Loss = 3.2390e-03, PNorm = 153.2967, GNorm = 0.2780, lr_0 = 1.9707e-04
Loss = 3.6328e-03, PNorm = 153.3019, GNorm = 0.1896, lr_0 = 1.9693e-04
Loss = 1.4715e-03, PNorm = 153.3042, GNorm = 0.0944, lr_0 = 1.9680e-04
Loss = 4.4031e-03, PNorm = 153.3077, GNorm = 0.2231, lr_0 = 1.9666e-04
Loss = 3.0170e-03, PNorm = 153.3107, GNorm = 0.2912, lr_0 = 1.9653e-04
Loss = 1.7309e-03, PNorm = 153.3150, GNorm = 0.0639, lr_0 = 1.9639e-04
Loss = 1.5435e-03, PNorm = 153.3191, GNorm = 0.1742, lr_0 = 1.9626e-04
Loss = 3.4347e-03, PNorm = 153.3237, GNorm = 0.4152, lr_0 = 1.9612e-04
Loss = 2.0464e-03, PNorm = 153.3269, GNorm = 0.2166, lr_0 = 1.9599e-04
Loss = 3.4733e-03, PNorm = 153.3326, GNorm = 0.1662, lr_0 = 1.9585e-04
Loss = 2.3007e-03, PNorm = 153.3380, GNorm = 0.1550, lr_0 = 1.9572e-04
Loss = 2.3061e-03, PNorm = 153.3418, GNorm = 0.1124, lr_0 = 1.9559e-04
Loss = 4.1415e-03, PNorm = 153.3446, GNorm = 0.0518, lr_0 = 1.9545e-04
Loss = 2.2863e-03, PNorm = 153.3514, GNorm = 0.1379, lr_0 = 1.9532e-04
Loss = 2.4162e-03, PNorm = 153.3560, GNorm = 0.1741, lr_0 = 1.9518e-04
Loss = 1.7486e-03, PNorm = 153.3588, GNorm = 0.0755, lr_0 = 1.9505e-04
Loss = 4.1453e-03, PNorm = 153.3617, GNorm = 0.0822, lr_0 = 1.9492e-04
Loss = 2.1139e-03, PNorm = 153.3662, GNorm = 0.1118, lr_0 = 1.9478e-04
Loss = 4.2842e-03, PNorm = 153.3713, GNorm = 0.4298, lr_0 = 1.9465e-04
Loss = 1.8959e-03, PNorm = 153.3747, GNorm = 0.1470, lr_0 = 1.9452e-04
Loss = 1.6424e-03, PNorm = 153.3779, GNorm = 0.1048, lr_0 = 1.9438e-04
Loss = 1.9971e-03, PNorm = 153.3804, GNorm = 0.1439, lr_0 = 1.9425e-04
Loss = 1.8392e-03, PNorm = 153.3808, GNorm = 0.0978, lr_0 = 1.9412e-04
Loss = 2.4317e-03, PNorm = 153.3840, GNorm = 0.0819, lr_0 = 1.9398e-04
Loss = 2.0534e-03, PNorm = 153.3891, GNorm = 0.0671, lr_0 = 1.9385e-04
Loss = 2.3838e-03, PNorm = 153.3926, GNorm = 0.0748, lr_0 = 1.9372e-04
Loss = 2.0861e-03, PNorm = 153.3954, GNorm = 0.2496, lr_0 = 1.9359e-04
Loss = 2.0462e-03, PNorm = 153.3980, GNorm = 0.4258, lr_0 = 1.9345e-04
Loss = 4.0366e-03, PNorm = 153.4027, GNorm = 0.2138, lr_0 = 1.9332e-04
Loss = 2.1429e-03, PNorm = 153.4078, GNorm = 0.1008, lr_0 = 1.9319e-04
Loss = 2.6901e-03, PNorm = 153.4126, GNorm = 0.1285, lr_0 = 1.9306e-04
Validation mae = 0.475439
Epoch 22
Loss = 3.0078e-03, PNorm = 153.4150, GNorm = 0.0601, lr_0 = 1.9292e-04
Loss = 1.7368e-03, PNorm = 153.4176, GNorm = 0.0995, lr_0 = 1.9279e-04
Loss = 1.6314e-03, PNorm = 153.4207, GNorm = 0.0817, lr_0 = 1.9266e-04
Loss = 3.1889e-03, PNorm = 153.4221, GNorm = 0.1012, lr_0 = 1.9253e-04
Loss = 2.2245e-03, PNorm = 153.4246, GNorm = 0.0741, lr_0 = 1.9240e-04
Loss = 1.8637e-03, PNorm = 153.4273, GNorm = 0.1830, lr_0 = 1.9226e-04
Loss = 2.2608e-03, PNorm = 153.4324, GNorm = 0.1176, lr_0 = 1.9213e-04
Loss = 2.0820e-03, PNorm = 153.4365, GNorm = 0.1700, lr_0 = 1.9200e-04
Loss = 4.5544e-03, PNorm = 153.4400, GNorm = 0.2680, lr_0 = 1.9187e-04
Loss = 2.2168e-03, PNorm = 153.4428, GNorm = 0.1148, lr_0 = 1.9174e-04
Loss = 1.8386e-03, PNorm = 153.4492, GNorm = 0.1623, lr_0 = 1.9161e-04
Loss = 1.7566e-03, PNorm = 153.4541, GNorm = 0.0858, lr_0 = 1.9148e-04
Loss = 1.4994e-03, PNorm = 153.4587, GNorm = 0.1135, lr_0 = 1.9134e-04
Loss = 2.3580e-03, PNorm = 153.4648, GNorm = 0.1206, lr_0 = 1.9121e-04
Loss = 1.9378e-03, PNorm = 153.4658, GNorm = 0.1475, lr_0 = 1.9108e-04
Loss = 3.0342e-03, PNorm = 153.4672, GNorm = 0.1314, lr_0 = 1.9095e-04
Loss = 2.0307e-03, PNorm = 153.4685, GNorm = 0.2444, lr_0 = 1.9082e-04
Loss = 1.6673e-03, PNorm = 153.4733, GNorm = 0.1296, lr_0 = 1.9069e-04
Loss = 1.5212e-03, PNorm = 153.4770, GNorm = 0.1081, lr_0 = 1.9056e-04
Loss = 1.5679e-03, PNorm = 153.4802, GNorm = 0.0384, lr_0 = 1.9043e-04
Loss = 2.5588e-03, PNorm = 153.4841, GNorm = 0.1490, lr_0 = 1.9030e-04
Loss = 1.5258e-03, PNorm = 153.4877, GNorm = 0.1693, lr_0 = 1.9017e-04
Loss = 1.3770e-03, PNorm = 153.4881, GNorm = 0.1589, lr_0 = 1.9004e-04
Loss = 1.7059e-03, PNorm = 153.4898, GNorm = 0.0972, lr_0 = 1.8991e-04
Loss = 1.3665e-03, PNorm = 153.4931, GNorm = 0.1306, lr_0 = 1.8978e-04
Loss = 1.8662e-03, PNorm = 153.4976, GNorm = 0.1941, lr_0 = 1.8965e-04
Loss = 1.4171e-03, PNorm = 153.5022, GNorm = 0.1071, lr_0 = 1.8952e-04
Loss = 2.1485e-03, PNorm = 153.5046, GNorm = 0.0761, lr_0 = 1.8939e-04
Loss = 2.9419e-03, PNorm = 153.5088, GNorm = 0.1759, lr_0 = 1.8926e-04
Loss = 5.4542e-03, PNorm = 153.5119, GNorm = 0.3611, lr_0 = 1.8913e-04
Loss = 2.1047e-03, PNorm = 153.5167, GNorm = 0.1105, lr_0 = 1.8900e-04
Loss = 2.7719e-03, PNorm = 153.5205, GNorm = 0.1544, lr_0 = 1.8887e-04
Loss = 1.7356e-03, PNorm = 153.5212, GNorm = 0.1923, lr_0 = 1.8874e-04
Loss = 1.4613e-03, PNorm = 153.5242, GNorm = 0.1767, lr_0 = 1.8861e-04
Loss = 1.5006e-03, PNorm = 153.5263, GNorm = 0.0527, lr_0 = 1.8848e-04
Loss = 1.9501e-03, PNorm = 153.5301, GNorm = 0.0968, lr_0 = 1.8835e-04
Loss = 1.2132e-03, PNorm = 153.5331, GNorm = 0.0744, lr_0 = 1.8822e-04
Loss = 1.4442e-03, PNorm = 153.5339, GNorm = 0.1709, lr_0 = 1.8809e-04
Loss = 1.5555e-03, PNorm = 153.5376, GNorm = 0.2107, lr_0 = 1.8797e-04
Loss = 1.8897e-03, PNorm = 153.5390, GNorm = 0.1236, lr_0 = 1.8784e-04
Loss = 1.6575e-03, PNorm = 153.5435, GNorm = 0.0987, lr_0 = 1.8771e-04
Loss = 1.5156e-03, PNorm = 153.5492, GNorm = 0.1164, lr_0 = 1.8758e-04
Loss = 1.8863e-03, PNorm = 153.5525, GNorm = 0.1814, lr_0 = 1.8745e-04
Loss = 2.7370e-03, PNorm = 153.5566, GNorm = 0.0700, lr_0 = 1.8732e-04
Loss = 1.6117e-03, PNorm = 153.5594, GNorm = 0.0527, lr_0 = 1.8719e-04
Loss = 2.0031e-03, PNorm = 153.5631, GNorm = 0.0868, lr_0 = 1.8707e-04
Loss = 1.7574e-03, PNorm = 153.5687, GNorm = 0.0824, lr_0 = 1.8694e-04
Loss = 2.9294e-03, PNorm = 153.5719, GNorm = 0.1723, lr_0 = 1.8681e-04
Loss = 1.4028e-03, PNorm = 153.5753, GNorm = 0.0659, lr_0 = 1.8668e-04
Loss = 2.5972e-03, PNorm = 153.5798, GNorm = 0.1437, lr_0 = 1.8655e-04
Loss = 1.4159e-03, PNorm = 153.5812, GNorm = 0.2093, lr_0 = 1.8643e-04
Loss = 3.5241e-03, PNorm = 153.5841, GNorm = 0.1048, lr_0 = 1.8630e-04
Loss = 2.0092e-03, PNorm = 153.5873, GNorm = 0.1166, lr_0 = 1.8617e-04
Loss = 1.7709e-03, PNorm = 153.5901, GNorm = 0.0573, lr_0 = 1.8604e-04
Loss = 1.5779e-03, PNorm = 153.5942, GNorm = 0.0857, lr_0 = 1.8592e-04
Loss = 1.7965e-03, PNorm = 153.5967, GNorm = 0.0930, lr_0 = 1.8579e-04
Loss = 2.0753e-03, PNorm = 153.6015, GNorm = 0.0918, lr_0 = 1.8566e-04
Loss = 1.3912e-03, PNorm = 153.6055, GNorm = 0.0589, lr_0 = 1.8553e-04
Loss = 1.5457e-03, PNorm = 153.6091, GNorm = 0.1117, lr_0 = 1.8541e-04
Loss = 2.0397e-03, PNorm = 153.6129, GNorm = 0.1294, lr_0 = 1.8528e-04
Loss = 2.0367e-03, PNorm = 153.6172, GNorm = 0.2274, lr_0 = 1.8515e-04
Loss = 1.4091e-03, PNorm = 153.6202, GNorm = 0.1372, lr_0 = 1.8503e-04
Loss = 1.9771e-03, PNorm = 153.6243, GNorm = 0.1225, lr_0 = 1.8490e-04
Loss = 1.5669e-03, PNorm = 153.6260, GNorm = 0.1942, lr_0 = 1.8477e-04
Loss = 2.7513e-03, PNorm = 153.6297, GNorm = 0.1883, lr_0 = 1.8465e-04
Loss = 1.4598e-03, PNorm = 153.6337, GNorm = 0.0367, lr_0 = 1.8452e-04
Loss = 1.5412e-03, PNorm = 153.6400, GNorm = 0.0586, lr_0 = 1.8439e-04
Loss = 1.4589e-03, PNorm = 153.6443, GNorm = 0.0678, lr_0 = 1.8427e-04
Loss = 1.7348e-03, PNorm = 153.6461, GNorm = 0.1353, lr_0 = 1.8414e-04
Loss = 2.3971e-03, PNorm = 153.6483, GNorm = 0.0943, lr_0 = 1.8401e-04
Loss = 1.5583e-03, PNorm = 153.6506, GNorm = 0.0681, lr_0 = 1.8389e-04
Loss = 2.1885e-03, PNorm = 153.6536, GNorm = 0.1631, lr_0 = 1.8376e-04
Loss = 3.5566e-03, PNorm = 153.6573, GNorm = 0.2045, lr_0 = 1.8364e-04
Loss = 3.4116e-03, PNorm = 153.6611, GNorm = 0.0814, lr_0 = 1.8351e-04
Loss = 1.8396e-03, PNorm = 153.6627, GNorm = 0.1756, lr_0 = 1.8338e-04
Loss = 2.6595e-03, PNorm = 153.6673, GNorm = 0.1849, lr_0 = 1.8326e-04
Loss = 1.5120e-03, PNorm = 153.6716, GNorm = 0.0642, lr_0 = 1.8313e-04
Loss = 2.0555e-03, PNorm = 153.6753, GNorm = 0.1471, lr_0 = 1.8301e-04
Loss = 1.6988e-03, PNorm = 153.6792, GNorm = 0.1445, lr_0 = 1.8288e-04
Loss = 3.5766e-03, PNorm = 153.6837, GNorm = 0.0564, lr_0 = 1.8276e-04
Loss = 1.3718e-03, PNorm = 153.6887, GNorm = 0.0740, lr_0 = 1.8263e-04
Loss = 1.8369e-03, PNorm = 153.6917, GNorm = 0.1007, lr_0 = 1.8251e-04
Loss = 2.5795e-03, PNorm = 153.6945, GNorm = 0.1458, lr_0 = 1.8238e-04
Loss = 2.3823e-03, PNorm = 153.6985, GNorm = 0.0825, lr_0 = 1.8226e-04
Loss = 1.5064e-03, PNorm = 153.7011, GNorm = 0.0962, lr_0 = 1.8213e-04
Loss = 1.6667e-03, PNorm = 153.7039, GNorm = 0.1293, lr_0 = 1.8201e-04
Loss = 1.9176e-03, PNorm = 153.7064, GNorm = 0.3205, lr_0 = 1.8188e-04
Loss = 2.5839e-03, PNorm = 153.7086, GNorm = 0.3230, lr_0 = 1.8176e-04
Loss = 1.7399e-03, PNorm = 153.7096, GNorm = 0.1448, lr_0 = 1.8163e-04
Loss = 1.6510e-03, PNorm = 153.7133, GNorm = 0.0820, lr_0 = 1.8151e-04
Loss = 3.3357e-03, PNorm = 153.7169, GNorm = 0.1815, lr_0 = 1.8138e-04
Loss = 1.7181e-03, PNorm = 153.7210, GNorm = 0.1952, lr_0 = 1.8126e-04
Loss = 1.4408e-03, PNorm = 153.7251, GNorm = 0.2237, lr_0 = 1.8114e-04
Loss = 1.2649e-03, PNorm = 153.7289, GNorm = 0.1250, lr_0 = 1.8101e-04
Loss = 2.2950e-03, PNorm = 153.7324, GNorm = 0.1686, lr_0 = 1.8089e-04
Loss = 1.3227e-03, PNorm = 153.7365, GNorm = 0.0957, lr_0 = 1.8076e-04
Loss = 4.4594e-03, PNorm = 153.7404, GNorm = 0.2058, lr_0 = 1.8064e-04
Loss = 1.5150e-03, PNorm = 153.7429, GNorm = 0.1010, lr_0 = 1.8052e-04
Loss = 3.5107e-03, PNorm = 153.7469, GNorm = 0.1750, lr_0 = 1.8039e-04
Loss = 1.9512e-03, PNorm = 153.7499, GNorm = 0.1323, lr_0 = 1.8027e-04
Loss = 2.0421e-03, PNorm = 153.7515, GNorm = 0.1770, lr_0 = 1.8015e-04
Loss = 3.1234e-03, PNorm = 153.7556, GNorm = 0.1724, lr_0 = 1.8002e-04
Loss = 1.4956e-03, PNorm = 153.7594, GNorm = 0.0496, lr_0 = 1.7990e-04
Loss = 2.6850e-03, PNorm = 153.7623, GNorm = 0.1430, lr_0 = 1.7978e-04
Loss = 1.3478e-03, PNorm = 153.7636, GNorm = 0.2664, lr_0 = 1.7965e-04
Loss = 2.6368e-03, PNorm = 153.7668, GNorm = 0.0652, lr_0 = 1.7953e-04
Loss = 2.3539e-03, PNorm = 153.7724, GNorm = 0.1982, lr_0 = 1.7941e-04
Loss = 1.3022e-03, PNorm = 153.7778, GNorm = 0.0550, lr_0 = 1.7928e-04
Loss = 1.7715e-03, PNorm = 153.7805, GNorm = 0.1229, lr_0 = 1.7916e-04
Loss = 1.4932e-03, PNorm = 153.7821, GNorm = 0.0609, lr_0 = 1.7904e-04
Loss = 1.9588e-03, PNorm = 153.7856, GNorm = 0.1532, lr_0 = 1.7892e-04
Loss = 1.8848e-03, PNorm = 153.7882, GNorm = 0.1022, lr_0 = 1.7879e-04
Loss = 1.7465e-03, PNorm = 153.7953, GNorm = 0.0561, lr_0 = 1.7867e-04
Loss = 1.2016e-03, PNorm = 153.7990, GNorm = 0.0530, lr_0 = 1.7855e-04
Loss = 2.6692e-03, PNorm = 153.8053, GNorm = 0.0675, lr_0 = 1.7843e-04
Loss = 1.4273e-03, PNorm = 153.8099, GNorm = 0.4002, lr_0 = 1.7830e-04
Loss = 2.6131e-03, PNorm = 153.8120, GNorm = 0.0887, lr_0 = 1.7818e-04
Loss = 4.4839e-03, PNorm = 153.8148, GNorm = 0.1574, lr_0 = 1.7806e-04
Loss = 3.8537e-03, PNorm = 153.8189, GNorm = 0.3282, lr_0 = 1.7794e-04
Loss = 3.9938e-03, PNorm = 153.8214, GNorm = 0.2253, lr_0 = 1.7782e-04
Validation mae = 0.475325
Epoch 23
Loss = 2.2001e-03, PNorm = 153.8231, GNorm = 0.0848, lr_0 = 1.7769e-04
Loss = 1.3453e-03, PNorm = 153.8274, GNorm = 0.0971, lr_0 = 1.7757e-04
Loss = 1.4492e-03, PNorm = 153.8307, GNorm = 0.1360, lr_0 = 1.7745e-04
Loss = 1.7435e-03, PNorm = 153.8333, GNorm = 0.1572, lr_0 = 1.7733e-04
Loss = 1.4495e-03, PNorm = 153.8366, GNorm = 0.0295, lr_0 = 1.7721e-04
Loss = 1.8907e-03, PNorm = 153.8390, GNorm = 0.0965, lr_0 = 1.7709e-04
Loss = 1.6706e-03, PNorm = 153.8409, GNorm = 0.2657, lr_0 = 1.7696e-04
Loss = 1.1807e-03, PNorm = 153.8426, GNorm = 0.0910, lr_0 = 1.7684e-04
Loss = 1.4414e-03, PNorm = 153.8475, GNorm = 0.1812, lr_0 = 1.7672e-04
Loss = 1.4626e-03, PNorm = 153.8511, GNorm = 0.0922, lr_0 = 1.7660e-04
Loss = 1.4882e-03, PNorm = 153.8519, GNorm = 0.1252, lr_0 = 1.7648e-04
Loss = 1.7678e-03, PNorm = 153.8517, GNorm = 0.1077, lr_0 = 1.7636e-04
Loss = 1.5077e-03, PNorm = 153.8541, GNorm = 0.1563, lr_0 = 1.7624e-04
Loss = 2.0325e-03, PNorm = 153.8568, GNorm = 0.1934, lr_0 = 1.7612e-04
Loss = 1.4116e-03, PNorm = 153.8614, GNorm = 0.1256, lr_0 = 1.7600e-04
Loss = 2.6634e-03, PNorm = 153.8647, GNorm = 0.2360, lr_0 = 1.7588e-04
Loss = 1.8406e-03, PNorm = 153.8690, GNorm = 0.1920, lr_0 = 1.7576e-04
Loss = 1.5808e-03, PNorm = 153.8710, GNorm = 0.0421, lr_0 = 1.7564e-04
Loss = 1.1289e-03, PNorm = 153.8728, GNorm = 0.0725, lr_0 = 1.7552e-04
Loss = 1.0963e-03, PNorm = 153.8759, GNorm = 0.0552, lr_0 = 1.7540e-04
Loss = 1.8753e-03, PNorm = 153.8768, GNorm = 0.1564, lr_0 = 1.7528e-04
Loss = 2.0966e-03, PNorm = 153.8789, GNorm = 0.1033, lr_0 = 1.7516e-04
Loss = 1.5499e-03, PNorm = 153.8801, GNorm = 0.0673, lr_0 = 1.7504e-04
Loss = 1.8619e-03, PNorm = 153.8808, GNorm = 0.1964, lr_0 = 1.7492e-04
Loss = 1.3577e-03, PNorm = 153.8833, GNorm = 0.0889, lr_0 = 1.7480e-04
Loss = 1.2896e-03, PNorm = 153.8873, GNorm = 0.3346, lr_0 = 1.7468e-04
Loss = 3.0932e-03, PNorm = 153.8924, GNorm = 0.1872, lr_0 = 1.7456e-04
Loss = 1.1472e-03, PNorm = 153.8978, GNorm = 0.1034, lr_0 = 1.7444e-04
Loss = 1.6220e-03, PNorm = 153.8998, GNorm = 0.2210, lr_0 = 1.7432e-04
Loss = 1.6935e-03, PNorm = 153.9031, GNorm = 0.0771, lr_0 = 1.7420e-04
Loss = 1.3660e-03, PNorm = 153.9047, GNorm = 0.0324, lr_0 = 1.7408e-04
Loss = 6.1121e-03, PNorm = 153.9043, GNorm = 0.2454, lr_0 = 1.7396e-04
Loss = 1.9926e-03, PNorm = 153.9089, GNorm = 0.2034, lr_0 = 1.7384e-04
Loss = 1.9365e-03, PNorm = 153.9123, GNorm = 0.0772, lr_0 = 1.7372e-04
Loss = 2.9126e-03, PNorm = 153.9183, GNorm = 0.0681, lr_0 = 1.7360e-04
Loss = 2.3919e-03, PNorm = 153.9191, GNorm = 0.0628, lr_0 = 1.7348e-04
Loss = 2.2856e-03, PNorm = 153.9197, GNorm = 0.0791, lr_0 = 1.7336e-04
Loss = 3.9119e-03, PNorm = 153.9209, GNorm = 0.3115, lr_0 = 1.7325e-04
Loss = 1.5303e-03, PNorm = 153.9261, GNorm = 0.2275, lr_0 = 1.7313e-04
Loss = 1.3394e-03, PNorm = 153.9295, GNorm = 0.1522, lr_0 = 1.7301e-04
Loss = 1.5349e-03, PNorm = 153.9338, GNorm = 0.0654, lr_0 = 1.7289e-04
Loss = 2.3944e-03, PNorm = 153.9366, GNorm = 0.0779, lr_0 = 1.7277e-04
Loss = 1.5383e-03, PNorm = 153.9406, GNorm = 0.0994, lr_0 = 1.7265e-04
Loss = 2.0683e-03, PNorm = 153.9427, GNorm = 0.0958, lr_0 = 1.7253e-04
Loss = 1.1655e-03, PNorm = 153.9444, GNorm = 0.0383, lr_0 = 1.7242e-04
Loss = 1.0935e-03, PNorm = 153.9466, GNorm = 0.1005, lr_0 = 1.7230e-04
Loss = 2.0120e-03, PNorm = 153.9478, GNorm = 0.0367, lr_0 = 1.7218e-04
Loss = 1.2014e-03, PNorm = 153.9498, GNorm = 0.0755, lr_0 = 1.7206e-04
Loss = 1.4921e-03, PNorm = 153.9512, GNorm = 0.0935, lr_0 = 1.7194e-04
Loss = 2.9093e-03, PNorm = 153.9530, GNorm = 0.2026, lr_0 = 1.7183e-04
Loss = 1.5704e-03, PNorm = 153.9576, GNorm = 0.0570, lr_0 = 1.7171e-04
Loss = 2.1378e-03, PNorm = 153.9603, GNorm = 0.0377, lr_0 = 1.7159e-04
Loss = 2.1408e-03, PNorm = 153.9614, GNorm = 0.0853, lr_0 = 1.7147e-04
Loss = 1.9581e-03, PNorm = 153.9630, GNorm = 0.1315, lr_0 = 1.7136e-04
Loss = 4.9388e-03, PNorm = 153.9668, GNorm = 0.2836, lr_0 = 1.7124e-04
Loss = 3.4389e-03, PNorm = 153.9707, GNorm = 0.1102, lr_0 = 1.7112e-04
Loss = 2.5271e-03, PNorm = 153.9713, GNorm = 0.1277, lr_0 = 1.7100e-04
Loss = 1.5410e-03, PNorm = 153.9736, GNorm = 0.0867, lr_0 = 1.7089e-04
Loss = 2.1678e-03, PNorm = 153.9795, GNorm = 0.1567, lr_0 = 1.7077e-04
Loss = 1.2535e-03, PNorm = 153.9855, GNorm = 0.1658, lr_0 = 1.7065e-04
Loss = 2.1522e-03, PNorm = 153.9880, GNorm = 0.1416, lr_0 = 1.7054e-04
Loss = 3.7120e-03, PNorm = 153.9909, GNorm = 0.3066, lr_0 = 1.7042e-04
Loss = 1.0952e-03, PNorm = 153.9937, GNorm = 0.0613, lr_0 = 1.7030e-04
Loss = 1.6946e-03, PNorm = 153.9982, GNorm = 0.1292, lr_0 = 1.7019e-04
Loss = 1.6514e-03, PNorm = 154.0026, GNorm = 0.1687, lr_0 = 1.7007e-04
Loss = 1.9715e-03, PNorm = 154.0055, GNorm = 0.2203, lr_0 = 1.6995e-04
Loss = 1.0789e-03, PNorm = 154.0098, GNorm = 0.0767, lr_0 = 1.6984e-04
Loss = 1.0623e-03, PNorm = 154.0123, GNorm = 0.0368, lr_0 = 1.6972e-04
Loss = 1.5513e-03, PNorm = 154.0156, GNorm = 0.0675, lr_0 = 1.6960e-04
Loss = 1.2266e-03, PNorm = 154.0201, GNorm = 0.1554, lr_0 = 1.6949e-04
Loss = 1.2123e-03, PNorm = 154.0231, GNorm = 0.1341, lr_0 = 1.6937e-04
Loss = 2.6485e-03, PNorm = 154.0218, GNorm = 0.3893, lr_0 = 1.6926e-04
Loss = 2.0139e-03, PNorm = 154.0222, GNorm = 0.0934, lr_0 = 1.6914e-04
Loss = 2.4092e-03, PNorm = 154.0229, GNorm = 0.0833, lr_0 = 1.6902e-04
Loss = 1.4379e-03, PNorm = 154.0269, GNorm = 0.0720, lr_0 = 1.6891e-04
Loss = 1.1121e-03, PNorm = 154.0316, GNorm = 0.1158, lr_0 = 1.6879e-04
Loss = 1.4840e-03, PNorm = 154.0340, GNorm = 0.0438, lr_0 = 1.6868e-04
Loss = 1.1621e-03, PNorm = 154.0362, GNorm = 0.0970, lr_0 = 1.6856e-04
Loss = 1.1116e-03, PNorm = 154.0401, GNorm = 0.0646, lr_0 = 1.6845e-04
Loss = 1.9476e-03, PNorm = 154.0438, GNorm = 0.0476, lr_0 = 1.6833e-04
Loss = 1.0913e-03, PNorm = 154.0466, GNorm = 0.0787, lr_0 = 1.6821e-04
Loss = 1.0826e-03, PNorm = 154.0497, GNorm = 0.0876, lr_0 = 1.6810e-04
Loss = 2.4886e-03, PNorm = 154.0518, GNorm = 0.1182, lr_0 = 1.6798e-04
Loss = 1.8144e-03, PNorm = 154.0551, GNorm = 0.0584, lr_0 = 1.6787e-04
Loss = 2.5006e-03, PNorm = 154.0559, GNorm = 0.1017, lr_0 = 1.6775e-04
Loss = 1.1006e-03, PNorm = 154.0577, GNorm = 0.1844, lr_0 = 1.6764e-04
Loss = 2.0653e-03, PNorm = 154.0611, GNorm = 0.0796, lr_0 = 1.6752e-04
Loss = 1.0991e-03, PNorm = 154.0666, GNorm = 0.0368, lr_0 = 1.6741e-04
Loss = 1.3092e-03, PNorm = 154.0711, GNorm = 0.0926, lr_0 = 1.6729e-04
Loss = 1.1151e-03, PNorm = 154.0740, GNorm = 0.0362, lr_0 = 1.6718e-04
Loss = 1.1280e-03, PNorm = 154.0764, GNorm = 0.1172, lr_0 = 1.6707e-04
Loss = 1.9080e-03, PNorm = 154.0786, GNorm = 0.0852, lr_0 = 1.6695e-04
Loss = 1.2133e-03, PNorm = 154.0811, GNorm = 0.1393, lr_0 = 1.6684e-04
Loss = 1.4315e-03, PNorm = 154.0834, GNorm = 0.0963, lr_0 = 1.6672e-04
Loss = 9.3599e-04, PNorm = 154.0836, GNorm = 0.2131, lr_0 = 1.6661e-04
Loss = 1.5629e-03, PNorm = 154.0841, GNorm = 0.0595, lr_0 = 1.6649e-04
Loss = 2.6811e-03, PNorm = 154.0868, GNorm = 0.0727, lr_0 = 1.6638e-04
Loss = 1.1271e-03, PNorm = 154.0881, GNorm = 0.0661, lr_0 = 1.6627e-04
Loss = 1.1608e-03, PNorm = 154.0906, GNorm = 0.2093, lr_0 = 1.6615e-04
Loss = 1.1484e-03, PNorm = 154.0932, GNorm = 0.0522, lr_0 = 1.6604e-04
Loss = 1.6754e-03, PNorm = 154.0965, GNorm = 0.2203, lr_0 = 1.6592e-04
Loss = 3.3445e-03, PNorm = 154.0996, GNorm = 0.0484, lr_0 = 1.6581e-04
Loss = 2.0060e-03, PNorm = 154.1017, GNorm = 0.0827, lr_0 = 1.6570e-04
Loss = 2.1691e-03, PNorm = 154.1048, GNorm = 0.0433, lr_0 = 1.6558e-04
Loss = 2.7802e-03, PNorm = 154.1089, GNorm = 0.1763, lr_0 = 1.6547e-04
Loss = 2.2436e-03, PNorm = 154.1128, GNorm = 0.1373, lr_0 = 1.6536e-04
Loss = 1.2499e-03, PNorm = 154.1179, GNorm = 0.0527, lr_0 = 1.6524e-04
Loss = 1.5429e-03, PNorm = 154.1206, GNorm = 0.1330, lr_0 = 1.6513e-04
Loss = 2.5966e-03, PNorm = 154.1194, GNorm = 0.1958, lr_0 = 1.6502e-04
Loss = 2.3487e-03, PNorm = 154.1205, GNorm = 0.0323, lr_0 = 1.6490e-04
Loss = 9.5414e-04, PNorm = 154.1236, GNorm = 0.1320, lr_0 = 1.6479e-04
Loss = 1.2050e-03, PNorm = 154.1270, GNorm = 0.0851, lr_0 = 1.6468e-04
Loss = 2.5567e-03, PNorm = 154.1322, GNorm = 0.1334, lr_0 = 1.6457e-04
Loss = 1.1769e-03, PNorm = 154.1358, GNorm = 0.1602, lr_0 = 1.6445e-04
Loss = 1.9021e-03, PNorm = 154.1388, GNorm = 0.0879, lr_0 = 1.6434e-04
Loss = 4.3440e-03, PNorm = 154.1432, GNorm = 0.0653, lr_0 = 1.6423e-04
Loss = 1.1818e-03, PNorm = 154.1466, GNorm = 0.0904, lr_0 = 1.6412e-04
Loss = 2.2427e-03, PNorm = 154.1504, GNorm = 0.0997, lr_0 = 1.6400e-04
Loss = 2.7050e-03, PNorm = 154.1525, GNorm = 0.0750, lr_0 = 1.6389e-04
Loss = 1.0644e-03, PNorm = 154.1536, GNorm = 0.1608, lr_0 = 1.6378e-04
Validation mae = 0.476123
Epoch 24
Loss = 1.2432e-03, PNorm = 154.1548, GNorm = 0.1534, lr_0 = 1.6367e-04
Loss = 1.6559e-03, PNorm = 154.1570, GNorm = 0.1180, lr_0 = 1.6355e-04
Loss = 1.2270e-03, PNorm = 154.1592, GNorm = 0.0708, lr_0 = 1.6344e-04
Loss = 1.6029e-03, PNorm = 154.1617, GNorm = 0.0615, lr_0 = 1.6333e-04
Loss = 2.1519e-03, PNorm = 154.1638, GNorm = 0.1614, lr_0 = 1.6322e-04
Loss = 1.4797e-03, PNorm = 154.1662, GNorm = 0.1421, lr_0 = 1.6311e-04
Loss = 3.6966e-03, PNorm = 154.1656, GNorm = 0.0728, lr_0 = 1.6299e-04
Loss = 1.8178e-03, PNorm = 154.1660, GNorm = 0.0405, lr_0 = 1.6288e-04
Loss = 1.5360e-03, PNorm = 154.1678, GNorm = 0.1668, lr_0 = 1.6277e-04
Loss = 1.2479e-03, PNorm = 154.1714, GNorm = 0.0552, lr_0 = 1.6266e-04
Loss = 1.1751e-03, PNorm = 154.1754, GNorm = 0.1609, lr_0 = 1.6255e-04
Loss = 1.0459e-03, PNorm = 154.1788, GNorm = 0.0875, lr_0 = 1.6244e-04
Loss = 9.9461e-04, PNorm = 154.1815, GNorm = 0.0872, lr_0 = 1.6233e-04
Loss = 1.3860e-03, PNorm = 154.1816, GNorm = 0.0948, lr_0 = 1.6221e-04
Loss = 1.6065e-03, PNorm = 154.1825, GNorm = 0.1932, lr_0 = 1.6210e-04
Loss = 1.0399e-03, PNorm = 154.1839, GNorm = 0.0708, lr_0 = 1.6199e-04
Loss = 1.6092e-03, PNorm = 154.1876, GNorm = 0.0809, lr_0 = 1.6188e-04
Loss = 1.1722e-03, PNorm = 154.1905, GNorm = 0.0372, lr_0 = 1.6177e-04
Loss = 1.2124e-03, PNorm = 154.1936, GNorm = 0.1040, lr_0 = 1.6166e-04
Loss = 1.4942e-03, PNorm = 154.1961, GNorm = 0.0705, lr_0 = 1.6155e-04
Loss = 2.0085e-03, PNorm = 154.1989, GNorm = 0.0836, lr_0 = 1.6144e-04
Loss = 1.1750e-03, PNorm = 154.1996, GNorm = 0.0496, lr_0 = 1.6133e-04
Loss = 1.9628e-03, PNorm = 154.2017, GNorm = 0.1644, lr_0 = 1.6122e-04
Loss = 1.2774e-03, PNorm = 154.2045, GNorm = 0.0690, lr_0 = 1.6111e-04
Loss = 1.4154e-03, PNorm = 154.2043, GNorm = 0.1042, lr_0 = 1.6100e-04
Loss = 2.5638e-03, PNorm = 154.2052, GNorm = 0.0736, lr_0 = 1.6089e-04
Loss = 1.6558e-03, PNorm = 154.2068, GNorm = 0.1865, lr_0 = 1.6078e-04
Loss = 3.9169e-03, PNorm = 154.2062, GNorm = 0.0827, lr_0 = 1.6067e-04
Loss = 2.0291e-03, PNorm = 154.2110, GNorm = 0.0627, lr_0 = 1.6056e-04
Loss = 1.6602e-03, PNorm = 154.2140, GNorm = 0.1204, lr_0 = 1.6045e-04
Loss = 9.2345e-04, PNorm = 154.2169, GNorm = 0.0607, lr_0 = 1.6034e-04
Loss = 2.0003e-03, PNorm = 154.2197, GNorm = 0.1036, lr_0 = 1.6023e-04
Loss = 1.0077e-03, PNorm = 154.2234, GNorm = 0.1261, lr_0 = 1.6012e-04
Loss = 9.8060e-04, PNorm = 154.2265, GNorm = 0.0611, lr_0 = 1.6001e-04
Loss = 2.0031e-03, PNorm = 154.2295, GNorm = 0.0979, lr_0 = 1.5990e-04
Loss = 2.0326e-03, PNorm = 154.2304, GNorm = 0.1368, lr_0 = 1.5979e-04
Loss = 1.3420e-03, PNorm = 154.2345, GNorm = 0.0540, lr_0 = 1.5968e-04
Loss = 2.0437e-03, PNorm = 154.2380, GNorm = 0.1685, lr_0 = 1.5957e-04
Loss = 1.6799e-03, PNorm = 154.2415, GNorm = 0.0967, lr_0 = 1.5946e-04
Loss = 1.3639e-03, PNorm = 154.2437, GNorm = 0.0981, lr_0 = 1.5935e-04
Loss = 1.2623e-03, PNorm = 154.2471, GNorm = 0.0709, lr_0 = 1.5924e-04
Loss = 2.7876e-03, PNorm = 154.2505, GNorm = 0.2182, lr_0 = 1.5913e-04
Loss = 2.3439e-03, PNorm = 154.2542, GNorm = 0.1988, lr_0 = 1.5902e-04
Loss = 2.5299e-03, PNorm = 154.2564, GNorm = 0.2043, lr_0 = 1.5891e-04
Loss = 3.1089e-03, PNorm = 154.2587, GNorm = 0.0646, lr_0 = 1.5880e-04
Loss = 1.3671e-03, PNorm = 154.2629, GNorm = 0.0842, lr_0 = 1.5870e-04
Loss = 1.1256e-03, PNorm = 154.2658, GNorm = 0.0523, lr_0 = 1.5859e-04
Loss = 1.5158e-03, PNorm = 154.2686, GNorm = 0.0346, lr_0 = 1.5848e-04
Loss = 2.9212e-03, PNorm = 154.2724, GNorm = 0.2241, lr_0 = 1.5837e-04
Loss = 1.3229e-03, PNorm = 154.2743, GNorm = 0.1664, lr_0 = 1.5826e-04
Loss = 1.6329e-03, PNorm = 154.2752, GNorm = 0.0577, lr_0 = 1.5815e-04
Loss = 1.8690e-03, PNorm = 154.2781, GNorm = 0.1110, lr_0 = 1.5804e-04
Loss = 1.2136e-03, PNorm = 154.2797, GNorm = 0.1594, lr_0 = 1.5794e-04
Loss = 8.3020e-04, PNorm = 154.2809, GNorm = 0.0588, lr_0 = 1.5783e-04
Loss = 1.0499e-03, PNorm = 154.2841, GNorm = 0.1402, lr_0 = 1.5772e-04
Loss = 8.7797e-04, PNorm = 154.2869, GNorm = 0.0268, lr_0 = 1.5761e-04
Loss = 1.1007e-03, PNorm = 154.2904, GNorm = 0.1775, lr_0 = 1.5750e-04
Loss = 2.4733e-03, PNorm = 154.2936, GNorm = 0.2450, lr_0 = 1.5740e-04
Loss = 1.6475e-03, PNorm = 154.2971, GNorm = 0.0518, lr_0 = 1.5729e-04
Loss = 9.8295e-04, PNorm = 154.2996, GNorm = 0.1789, lr_0 = 1.5718e-04
Loss = 2.0407e-03, PNorm = 154.3029, GNorm = 0.0457, lr_0 = 1.5707e-04
Loss = 1.2700e-03, PNorm = 154.3051, GNorm = 0.1435, lr_0 = 1.5697e-04
Loss = 8.0448e-04, PNorm = 154.3081, GNorm = 0.0220, lr_0 = 1.5686e-04
Loss = 1.4620e-03, PNorm = 154.3089, GNorm = 0.0702, lr_0 = 1.5675e-04
Loss = 1.1080e-03, PNorm = 154.3101, GNorm = 0.1744, lr_0 = 1.5664e-04
Loss = 2.0279e-03, PNorm = 154.3111, GNorm = 0.4278, lr_0 = 1.5654e-04
Loss = 1.3853e-03, PNorm = 154.3139, GNorm = 0.1771, lr_0 = 1.5643e-04
Loss = 1.3270e-03, PNorm = 154.3157, GNorm = 0.1045, lr_0 = 1.5632e-04
Loss = 9.1474e-04, PNorm = 154.3164, GNorm = 0.0323, lr_0 = 1.5621e-04
Loss = 1.3608e-03, PNorm = 154.3173, GNorm = 0.0311, lr_0 = 1.5611e-04
Loss = 9.0409e-04, PNorm = 154.3187, GNorm = 0.0863, lr_0 = 1.5600e-04
Loss = 1.2488e-03, PNorm = 154.3203, GNorm = 0.0523, lr_0 = 1.5589e-04
Loss = 1.4639e-03, PNorm = 154.3219, GNorm = 0.3590, lr_0 = 1.5579e-04
Loss = 1.7132e-03, PNorm = 154.3238, GNorm = 0.0507, lr_0 = 1.5568e-04
Loss = 1.3152e-03, PNorm = 154.3255, GNorm = 0.1076, lr_0 = 1.5557e-04
Loss = 1.4041e-03, PNorm = 154.3274, GNorm = 0.1034, lr_0 = 1.5547e-04
Loss = 1.1149e-03, PNorm = 154.3292, GNorm = 0.0529, lr_0 = 1.5536e-04
Loss = 1.7551e-03, PNorm = 154.3298, GNorm = 0.0851, lr_0 = 1.5525e-04
Loss = 1.2903e-03, PNorm = 154.3302, GNorm = 0.0553, lr_0 = 1.5515e-04
Loss = 2.3874e-03, PNorm = 154.3319, GNorm = 0.0903, lr_0 = 1.5504e-04
Loss = 1.0163e-03, PNorm = 154.3346, GNorm = 0.1495, lr_0 = 1.5493e-04
Loss = 2.1724e-03, PNorm = 154.3380, GNorm = 0.1320, lr_0 = 1.5483e-04
Loss = 2.9459e-03, PNorm = 154.3425, GNorm = 0.1847, lr_0 = 1.5472e-04
Loss = 1.8829e-03, PNorm = 154.3460, GNorm = 0.2431, lr_0 = 1.5462e-04
Loss = 1.1340e-03, PNorm = 154.3488, GNorm = 0.1398, lr_0 = 1.5451e-04
Loss = 3.2442e-03, PNorm = 154.3512, GNorm = 0.1566, lr_0 = 1.5440e-04
Loss = 1.2204e-03, PNorm = 154.3550, GNorm = 0.1398, lr_0 = 1.5430e-04
Loss = 1.3738e-03, PNorm = 154.3576, GNorm = 0.1232, lr_0 = 1.5419e-04
Loss = 1.3696e-03, PNorm = 154.3609, GNorm = 0.0703, lr_0 = 1.5409e-04
Loss = 1.0445e-03, PNorm = 154.3625, GNorm = 0.0515, lr_0 = 1.5398e-04
Loss = 2.0818e-03, PNorm = 154.3631, GNorm = 0.1308, lr_0 = 1.5388e-04
Loss = 4.2162e-03, PNorm = 154.3683, GNorm = 0.1304, lr_0 = 1.5377e-04
Loss = 1.5853e-03, PNorm = 154.3709, GNorm = 0.2970, lr_0 = 1.5367e-04
Loss = 2.5004e-03, PNorm = 154.3725, GNorm = 0.1764, lr_0 = 1.5356e-04
Loss = 1.1038e-03, PNorm = 154.3749, GNorm = 0.0530, lr_0 = 1.5346e-04
Loss = 9.3047e-04, PNorm = 154.3779, GNorm = 0.2088, lr_0 = 1.5335e-04
Loss = 1.0378e-03, PNorm = 154.3804, GNorm = 0.0755, lr_0 = 1.5325e-04
Loss = 3.3207e-03, PNorm = 154.3821, GNorm = 0.1023, lr_0 = 1.5314e-04
Loss = 8.5934e-04, PNorm = 154.3856, GNorm = 0.0798, lr_0 = 1.5304e-04
Loss = 1.1648e-03, PNorm = 154.3863, GNorm = 0.1504, lr_0 = 1.5293e-04
Loss = 1.6613e-03, PNorm = 154.3875, GNorm = 0.0835, lr_0 = 1.5283e-04
Loss = 8.8775e-04, PNorm = 154.3906, GNorm = 0.0722, lr_0 = 1.5272e-04
Loss = 1.0958e-03, PNorm = 154.3936, GNorm = 0.0317, lr_0 = 1.5262e-04
Loss = 3.1937e-03, PNorm = 154.3960, GNorm = 0.1369, lr_0 = 1.5251e-04
Loss = 2.3823e-03, PNorm = 154.3986, GNorm = 0.1152, lr_0 = 1.5241e-04
Loss = 2.6850e-03, PNorm = 154.4019, GNorm = 0.1521, lr_0 = 1.5230e-04
Loss = 1.6140e-03, PNorm = 154.4048, GNorm = 0.0621, lr_0 = 1.5220e-04
Loss = 1.9799e-03, PNorm = 154.4076, GNorm = 0.1196, lr_0 = 1.5209e-04
Loss = 1.6115e-03, PNorm = 154.4113, GNorm = 0.1261, lr_0 = 1.5199e-04
Loss = 1.2021e-03, PNorm = 154.4141, GNorm = 0.0677, lr_0 = 1.5189e-04
Loss = 1.2701e-03, PNorm = 154.4175, GNorm = 0.0945, lr_0 = 1.5178e-04
Loss = 1.7497e-03, PNorm = 154.4210, GNorm = 0.1025, lr_0 = 1.5168e-04
Loss = 9.9159e-04, PNorm = 154.4252, GNorm = 0.1360, lr_0 = 1.5157e-04
Loss = 1.1516e-03, PNorm = 154.4280, GNorm = 0.0583, lr_0 = 1.5147e-04
Loss = 5.7595e-03, PNorm = 154.4271, GNorm = 0.1674, lr_0 = 1.5137e-04
Loss = 1.3724e-03, PNorm = 154.4298, GNorm = 0.1242, lr_0 = 1.5126e-04
Loss = 9.4389e-04, PNorm = 154.4311, GNorm = 0.1118, lr_0 = 1.5116e-04
Loss = 9.7944e-04, PNorm = 154.4334, GNorm = 0.1100, lr_0 = 1.5106e-04
Loss = 1.7778e-03, PNorm = 154.4350, GNorm = 0.0415, lr_0 = 1.5095e-04
Loss = 1.1299e-03, PNorm = 154.4382, GNorm = 0.1553, lr_0 = 1.5085e-04
Validation mae = 0.475320
Epoch 25
Loss = 1.6827e-03, PNorm = 154.4398, GNorm = 0.1211, lr_0 = 1.5075e-04
Loss = 2.5019e-03, PNorm = 154.4415, GNorm = 0.0687, lr_0 = 1.5064e-04
Loss = 1.2342e-03, PNorm = 154.4422, GNorm = 0.0611, lr_0 = 1.5054e-04
Loss = 8.4916e-04, PNorm = 154.4450, GNorm = 0.2042, lr_0 = 1.5044e-04
Loss = 1.1406e-03, PNorm = 154.4464, GNorm = 0.1119, lr_0 = 1.5033e-04
Loss = 9.0043e-04, PNorm = 154.4475, GNorm = 0.0641, lr_0 = 1.5023e-04
Loss = 1.6604e-03, PNorm = 154.4502, GNorm = 0.0975, lr_0 = 1.5013e-04
Loss = 8.4108e-04, PNorm = 154.4516, GNorm = 0.0696, lr_0 = 1.5002e-04
Loss = 8.9848e-04, PNorm = 154.4544, GNorm = 0.2259, lr_0 = 1.4992e-04
Loss = 1.7956e-03, PNorm = 154.4573, GNorm = 0.3554, lr_0 = 1.4982e-04
Loss = 1.7966e-03, PNorm = 154.4621, GNorm = 0.1435, lr_0 = 1.4972e-04
Loss = 2.2707e-03, PNorm = 154.4640, GNorm = 0.1270, lr_0 = 1.4961e-04
Loss = 1.5943e-03, PNorm = 154.4661, GNorm = 0.2727, lr_0 = 1.4951e-04
Loss = 1.2821e-03, PNorm = 154.4697, GNorm = 0.1494, lr_0 = 1.4941e-04
Loss = 1.2148e-03, PNorm = 154.4719, GNorm = 0.0890, lr_0 = 1.4931e-04
Loss = 1.2153e-03, PNorm = 154.4729, GNorm = 0.0683, lr_0 = 1.4920e-04
Loss = 1.6948e-03, PNorm = 154.4742, GNorm = 0.3645, lr_0 = 1.4910e-04
Loss = 9.1549e-04, PNorm = 154.4758, GNorm = 0.0464, lr_0 = 1.4900e-04
Loss = 1.6326e-03, PNorm = 154.4782, GNorm = 0.0214, lr_0 = 1.4890e-04
Loss = 9.0045e-04, PNorm = 154.4799, GNorm = 0.0332, lr_0 = 1.4880e-04
Loss = 2.2074e-03, PNorm = 154.4816, GNorm = 0.1059, lr_0 = 1.4869e-04
Loss = 1.3666e-03, PNorm = 154.4821, GNorm = 0.1794, lr_0 = 1.4859e-04
Loss = 1.2373e-03, PNorm = 154.4842, GNorm = 0.0880, lr_0 = 1.4849e-04
Loss = 1.1398e-03, PNorm = 154.4861, GNorm = 0.0875, lr_0 = 1.4839e-04
Loss = 1.6358e-03, PNorm = 154.4879, GNorm = 0.1350, lr_0 = 1.4829e-04
Loss = 1.8204e-03, PNorm = 154.4907, GNorm = 0.0501, lr_0 = 1.4818e-04
Loss = 1.1411e-03, PNorm = 154.4927, GNorm = 0.2330, lr_0 = 1.4808e-04
Loss = 1.7797e-03, PNorm = 154.4933, GNorm = 0.1358, lr_0 = 1.4798e-04
Loss = 7.7027e-04, PNorm = 154.4956, GNorm = 0.0732, lr_0 = 1.4788e-04
Loss = 1.0243e-03, PNorm = 154.4987, GNorm = 0.0373, lr_0 = 1.4778e-04
Loss = 8.3192e-04, PNorm = 154.5027, GNorm = 0.0386, lr_0 = 1.4768e-04
Loss = 1.1165e-03, PNorm = 154.5039, GNorm = 0.0819, lr_0 = 1.4758e-04
Loss = 1.6214e-03, PNorm = 154.5063, GNorm = 0.0726, lr_0 = 1.4748e-04
Loss = 1.8492e-03, PNorm = 154.5086, GNorm = 0.1029, lr_0 = 1.4737e-04
Loss = 1.8859e-03, PNorm = 154.5111, GNorm = 0.0693, lr_0 = 1.4727e-04
Loss = 1.6388e-03, PNorm = 154.5120, GNorm = 0.1131, lr_0 = 1.4717e-04
Loss = 2.7098e-03, PNorm = 154.5155, GNorm = 0.0753, lr_0 = 1.4707e-04
Loss = 9.2009e-04, PNorm = 154.5185, GNorm = 0.0941, lr_0 = 1.4697e-04
Loss = 9.1475e-04, PNorm = 154.5205, GNorm = 0.0362, lr_0 = 1.4687e-04
Loss = 4.1063e-03, PNorm = 154.5219, GNorm = 0.1554, lr_0 = 1.4677e-04
Loss = 1.5914e-03, PNorm = 154.5232, GNorm = 0.1181, lr_0 = 1.4667e-04
Loss = 7.9406e-04, PNorm = 154.5254, GNorm = 0.0601, lr_0 = 1.4657e-04
Loss = 1.7056e-03, PNorm = 154.5293, GNorm = 0.1368, lr_0 = 1.4647e-04
Loss = 1.6646e-03, PNorm = 154.5323, GNorm = 0.0830, lr_0 = 1.4637e-04
Loss = 7.9227e-04, PNorm = 154.5343, GNorm = 0.1026, lr_0 = 1.4627e-04
Loss = 1.3606e-03, PNorm = 154.5361, GNorm = 0.0989, lr_0 = 1.4617e-04
Loss = 9.5457e-04, PNorm = 154.5377, GNorm = 0.1756, lr_0 = 1.4607e-04
Loss = 1.2939e-03, PNorm = 154.5379, GNorm = 0.1134, lr_0 = 1.4597e-04
Loss = 9.8458e-04, PNorm = 154.5396, GNorm = 0.1835, lr_0 = 1.4587e-04
Loss = 1.8102e-03, PNorm = 154.5412, GNorm = 0.0680, lr_0 = 1.4577e-04
Loss = 1.8747e-03, PNorm = 154.5440, GNorm = 0.2507, lr_0 = 1.4567e-04
Loss = 1.9597e-03, PNorm = 154.5466, GNorm = 0.2805, lr_0 = 1.4557e-04
Loss = 1.6399e-03, PNorm = 154.5502, GNorm = 0.2861, lr_0 = 1.4547e-04
Loss = 9.9602e-04, PNorm = 154.5541, GNorm = 0.1559, lr_0 = 1.4537e-04
Loss = 1.0405e-03, PNorm = 154.5573, GNorm = 0.1723, lr_0 = 1.4527e-04
Loss = 1.3272e-03, PNorm = 154.5584, GNorm = 0.1016, lr_0 = 1.4517e-04
Loss = 3.0510e-03, PNorm = 154.5585, GNorm = 0.0757, lr_0 = 1.4507e-04
Loss = 1.1179e-03, PNorm = 154.5608, GNorm = 0.0953, lr_0 = 1.4497e-04
Loss = 5.3789e-03, PNorm = 154.5635, GNorm = 0.2850, lr_0 = 1.4487e-04
Loss = 1.0433e-03, PNorm = 154.5671, GNorm = 0.1687, lr_0 = 1.4477e-04
Loss = 1.2634e-03, PNorm = 154.5722, GNorm = 0.2083, lr_0 = 1.4467e-04
Loss = 1.7112e-03, PNorm = 154.5739, GNorm = 0.1087, lr_0 = 1.4457e-04
Loss = 2.4986e-03, PNorm = 154.5746, GNorm = 0.0795, lr_0 = 1.4447e-04
Loss = 2.4445e-03, PNorm = 154.5764, GNorm = 0.0544, lr_0 = 1.4438e-04
Loss = 9.1950e-04, PNorm = 154.5788, GNorm = 0.0381, lr_0 = 1.4428e-04
Loss = 1.0543e-03, PNorm = 154.5822, GNorm = 0.1438, lr_0 = 1.4418e-04
Loss = 8.2483e-04, PNorm = 154.5846, GNorm = 0.0668, lr_0 = 1.4408e-04
Loss = 9.3069e-04, PNorm = 154.5855, GNorm = 0.1676, lr_0 = 1.4398e-04
Loss = 1.0322e-03, PNorm = 154.5862, GNorm = 0.0667, lr_0 = 1.4388e-04
Loss = 3.9010e-03, PNorm = 154.5888, GNorm = 0.1832, lr_0 = 1.4378e-04
Loss = 1.6206e-03, PNorm = 154.5891, GNorm = 0.0753, lr_0 = 1.4368e-04
Loss = 8.3979e-04, PNorm = 154.5916, GNorm = 0.1190, lr_0 = 1.4359e-04
Loss = 8.6628e-04, PNorm = 154.5926, GNorm = 0.0556, lr_0 = 1.4349e-04
Loss = 8.3396e-04, PNorm = 154.5937, GNorm = 0.1128, lr_0 = 1.4339e-04
Loss = 2.9951e-03, PNorm = 154.5965, GNorm = 0.2071, lr_0 = 1.4329e-04
Loss = 1.2295e-03, PNorm = 154.5992, GNorm = 0.1369, lr_0 = 1.4319e-04
Loss = 8.6757e-04, PNorm = 154.6022, GNorm = 0.1183, lr_0 = 1.4310e-04
Loss = 1.2571e-03, PNorm = 154.6045, GNorm = 0.1225, lr_0 = 1.4300e-04
Loss = 9.2057e-04, PNorm = 154.6080, GNorm = 0.0260, lr_0 = 1.4290e-04
Loss = 1.0478e-03, PNorm = 154.6120, GNorm = 0.4269, lr_0 = 1.4280e-04
Loss = 2.3427e-03, PNorm = 154.6118, GNorm = 0.2738, lr_0 = 1.4270e-04
Loss = 1.9005e-03, PNorm = 154.6147, GNorm = 0.0994, lr_0 = 1.4261e-04
Loss = 5.2526e-03, PNorm = 154.6177, GNorm = 0.0916, lr_0 = 1.4251e-04
Loss = 2.8880e-03, PNorm = 154.6204, GNorm = 0.0652, lr_0 = 1.4241e-04
Loss = 1.7611e-03, PNorm = 154.6227, GNorm = 0.0504, lr_0 = 1.4231e-04
Loss = 1.9328e-03, PNorm = 154.6231, GNorm = 0.0659, lr_0 = 1.4222e-04
Loss = 1.6316e-03, PNorm = 154.6241, GNorm = 0.1031, lr_0 = 1.4212e-04
Loss = 8.5360e-04, PNorm = 154.6271, GNorm = 0.0648, lr_0 = 1.4202e-04
Loss = 8.2281e-04, PNorm = 154.6286, GNorm = 0.0499, lr_0 = 1.4192e-04
Loss = 1.3365e-03, PNorm = 154.6315, GNorm = 0.1512, lr_0 = 1.4183e-04
Loss = 1.4746e-03, PNorm = 154.6318, GNorm = 0.3095, lr_0 = 1.4173e-04
Loss = 8.2428e-04, PNorm = 154.6326, GNorm = 0.1301, lr_0 = 1.4163e-04
Loss = 1.1475e-03, PNorm = 154.6334, GNorm = 0.1550, lr_0 = 1.4153e-04
Loss = 1.0985e-03, PNorm = 154.6359, GNorm = 0.2097, lr_0 = 1.4144e-04
Loss = 1.1106e-03, PNorm = 154.6386, GNorm = 0.0571, lr_0 = 1.4134e-04
Loss = 2.1664e-03, PNorm = 154.6425, GNorm = 0.1559, lr_0 = 1.4124e-04
Loss = 8.9157e-04, PNorm = 154.6447, GNorm = 0.1949, lr_0 = 1.4115e-04
Loss = 2.2811e-03, PNorm = 154.6461, GNorm = 0.3071, lr_0 = 1.4105e-04
Loss = 1.5262e-03, PNorm = 154.6480, GNorm = 0.0544, lr_0 = 1.4095e-04
Loss = 2.0398e-03, PNorm = 154.6498, GNorm = 0.0541, lr_0 = 1.4086e-04
Loss = 1.0094e-03, PNorm = 154.6509, GNorm = 0.1091, lr_0 = 1.4076e-04
Loss = 2.3965e-03, PNorm = 154.6526, GNorm = 0.2446, lr_0 = 1.4066e-04
Loss = 8.0807e-04, PNorm = 154.6551, GNorm = 0.0311, lr_0 = 1.4057e-04
Loss = 1.7730e-03, PNorm = 154.6584, GNorm = 0.0667, lr_0 = 1.4047e-04
Loss = 1.5014e-03, PNorm = 154.6597, GNorm = 0.0519, lr_0 = 1.4038e-04
Loss = 1.0218e-03, PNorm = 154.6628, GNorm = 0.1438, lr_0 = 1.4028e-04
Loss = 9.6552e-04, PNorm = 154.6632, GNorm = 0.0663, lr_0 = 1.4018e-04
Loss = 1.7953e-03, PNorm = 154.6651, GNorm = 0.0887, lr_0 = 1.4009e-04
Loss = 8.7445e-04, PNorm = 154.6667, GNorm = 0.0554, lr_0 = 1.3999e-04
Loss = 8.4259e-04, PNorm = 154.6685, GNorm = 0.0970, lr_0 = 1.3990e-04
Loss = 2.7248e-03, PNorm = 154.6708, GNorm = 0.0956, lr_0 = 1.3980e-04
Loss = 3.5132e-03, PNorm = 154.6739, GNorm = 0.2795, lr_0 = 1.3970e-04
Loss = 1.3509e-03, PNorm = 154.6757, GNorm = 0.0597, lr_0 = 1.3961e-04
Loss = 1.0764e-03, PNorm = 154.6775, GNorm = 0.1267, lr_0 = 1.3951e-04
Loss = 1.3366e-03, PNorm = 154.6791, GNorm = 0.1714, lr_0 = 1.3942e-04
Loss = 1.1555e-03, PNorm = 154.6801, GNorm = 0.0795, lr_0 = 1.3932e-04
Loss = 2.2247e-03, PNorm = 154.6834, GNorm = 0.0529, lr_0 = 1.3923e-04
Loss = 7.3597e-04, PNorm = 154.6863, GNorm = 0.0419, lr_0 = 1.3913e-04
Loss = 1.6861e-03, PNorm = 154.6874, GNorm = 0.0646, lr_0 = 1.3904e-04
Loss = 9.6438e-04, PNorm = 154.6899, GNorm = 0.0576, lr_0 = 1.3894e-04
Validation mae = 0.474796
Epoch 26
Loss = 9.2337e-04, PNorm = 154.6926, GNorm = 0.0575, lr_0 = 1.3884e-04
Loss = 8.5756e-04, PNorm = 154.6950, GNorm = 0.0819, lr_0 = 1.3875e-04
Loss = 2.1379e-03, PNorm = 154.6943, GNorm = 0.1680, lr_0 = 1.3865e-04
Loss = 1.2748e-03, PNorm = 154.6966, GNorm = 0.2163, lr_0 = 1.3856e-04
Loss = 7.6613e-04, PNorm = 154.7004, GNorm = 0.0695, lr_0 = 1.3846e-04
Loss = 9.6144e-04, PNorm = 154.7033, GNorm = 0.0646, lr_0 = 1.3837e-04
Loss = 1.5402e-03, PNorm = 154.7054, GNorm = 0.0466, lr_0 = 1.3828e-04
Loss = 3.2904e-03, PNorm = 154.7072, GNorm = 0.0615, lr_0 = 1.3818e-04
Loss = 4.8895e-03, PNorm = 154.7080, GNorm = 0.8953, lr_0 = 1.3809e-04
Loss = 7.7461e-04, PNorm = 154.7111, GNorm = 0.0323, lr_0 = 1.3799e-04
Loss = 1.0952e-03, PNorm = 154.7124, GNorm = 0.1295, lr_0 = 1.3790e-04
Loss = 1.4111e-03, PNorm = 154.7144, GNorm = 0.0508, lr_0 = 1.3780e-04
Loss = 2.8127e-03, PNorm = 154.7171, GNorm = 0.6649, lr_0 = 1.3771e-04
Loss = 9.4945e-04, PNorm = 154.7189, GNorm = 0.1426, lr_0 = 1.3761e-04
Loss = 1.0924e-03, PNorm = 154.7206, GNorm = 0.0823, lr_0 = 1.3752e-04
Loss = 6.7059e-04, PNorm = 154.7214, GNorm = 0.1446, lr_0 = 1.3742e-04
Loss = 7.0234e-04, PNorm = 154.7215, GNorm = 0.0823, lr_0 = 1.3733e-04
Loss = 1.5966e-03, PNorm = 154.7225, GNorm = 0.1255, lr_0 = 1.3724e-04
Loss = 8.3928e-04, PNorm = 154.7241, GNorm = 0.0878, lr_0 = 1.3714e-04
Loss = 1.6256e-03, PNorm = 154.7254, GNorm = 0.0609, lr_0 = 1.3705e-04
Loss = 1.3405e-03, PNorm = 154.7259, GNorm = 0.1151, lr_0 = 1.3695e-04
Loss = 2.1149e-03, PNorm = 154.7248, GNorm = 0.1102, lr_0 = 1.3686e-04
Loss = 6.3558e-04, PNorm = 154.7243, GNorm = 0.1514, lr_0 = 1.3677e-04
Loss = 8.6866e-04, PNorm = 154.7246, GNorm = 0.1339, lr_0 = 1.3667e-04
Loss = 8.0924e-04, PNorm = 154.7254, GNorm = 0.0411, lr_0 = 1.3658e-04
Loss = 1.6334e-03, PNorm = 154.7278, GNorm = 0.0635, lr_0 = 1.3649e-04
Loss = 1.5427e-03, PNorm = 154.7289, GNorm = 0.0711, lr_0 = 1.3639e-04
Loss = 1.3347e-03, PNorm = 154.7307, GNorm = 0.0340, lr_0 = 1.3630e-04
Loss = 8.6988e-04, PNorm = 154.7330, GNorm = 0.0864, lr_0 = 1.3621e-04
Loss = 8.5928e-04, PNorm = 154.7352, GNorm = 0.1772, lr_0 = 1.3611e-04
Loss = 1.8056e-03, PNorm = 154.7375, GNorm = 0.0969, lr_0 = 1.3602e-04
Loss = 1.2236e-03, PNorm = 154.7385, GNorm = 0.0429, lr_0 = 1.3593e-04
Loss = 1.3477e-03, PNorm = 154.7407, GNorm = 0.0398, lr_0 = 1.3583e-04
Loss = 1.2276e-03, PNorm = 154.7420, GNorm = 0.0829, lr_0 = 1.3574e-04
Loss = 1.3218e-03, PNorm = 154.7430, GNorm = 0.1003, lr_0 = 1.3565e-04
Loss = 6.9965e-04, PNorm = 154.7442, GNorm = 0.1564, lr_0 = 1.3555e-04
Loss = 6.3662e-04, PNorm = 154.7476, GNorm = 0.0457, lr_0 = 1.3546e-04
Loss = 7.0133e-04, PNorm = 154.7481, GNorm = 0.0670, lr_0 = 1.3537e-04
Loss = 6.9605e-04, PNorm = 154.7484, GNorm = 0.0347, lr_0 = 1.3528e-04
Loss = 9.1050e-04, PNorm = 154.7490, GNorm = 0.0631, lr_0 = 1.3518e-04
Loss = 9.4635e-04, PNorm = 154.7497, GNorm = 0.0794, lr_0 = 1.3509e-04
Loss = 1.0743e-03, PNorm = 154.7503, GNorm = 0.0768, lr_0 = 1.3500e-04
Loss = 1.2900e-03, PNorm = 154.7517, GNorm = 0.0790, lr_0 = 1.3491e-04
Loss = 2.1731e-03, PNorm = 154.7534, GNorm = 0.0373, lr_0 = 1.3481e-04
Loss = 1.5835e-03, PNorm = 154.7549, GNorm = 0.0905, lr_0 = 1.3472e-04
Loss = 1.7051e-03, PNorm = 154.7576, GNorm = 0.1072, lr_0 = 1.3463e-04
Loss = 7.3834e-04, PNorm = 154.7604, GNorm = 0.0298, lr_0 = 1.3454e-04
Loss = 1.2298e-03, PNorm = 154.7625, GNorm = 0.0490, lr_0 = 1.3444e-04
Loss = 1.2979e-03, PNorm = 154.7622, GNorm = 0.0719, lr_0 = 1.3435e-04
Loss = 1.1738e-03, PNorm = 154.7635, GNorm = 0.1722, lr_0 = 1.3426e-04
Loss = 7.5574e-04, PNorm = 154.7656, GNorm = 0.0208, lr_0 = 1.3417e-04
Loss = 1.1999e-03, PNorm = 154.7681, GNorm = 0.0605, lr_0 = 1.3408e-04
Loss = 1.9700e-03, PNorm = 154.7724, GNorm = 0.1552, lr_0 = 1.3398e-04
Loss = 2.7495e-03, PNorm = 154.7729, GNorm = 0.1115, lr_0 = 1.3389e-04
Loss = 7.2551e-04, PNorm = 154.7733, GNorm = 0.1091, lr_0 = 1.3380e-04
Loss = 6.7406e-04, PNorm = 154.7754, GNorm = 0.0991, lr_0 = 1.3371e-04
Loss = 9.6885e-04, PNorm = 154.7788, GNorm = 0.0207, lr_0 = 1.3362e-04
Loss = 1.6740e-03, PNorm = 154.7800, GNorm = 0.4818, lr_0 = 1.3353e-04
Loss = 6.8498e-04, PNorm = 154.7817, GNorm = 0.0654, lr_0 = 1.3343e-04
Loss = 1.0081e-03, PNorm = 154.7830, GNorm = 0.1382, lr_0 = 1.3334e-04
Loss = 8.1852e-04, PNorm = 154.7839, GNorm = 0.0876, lr_0 = 1.3325e-04
Loss = 2.0079e-03, PNorm = 154.7859, GNorm = 0.0323, lr_0 = 1.3316e-04
Loss = 4.5684e-03, PNorm = 154.7879, GNorm = 0.1336, lr_0 = 1.3307e-04
Loss = 8.6854e-04, PNorm = 154.7899, GNorm = 0.0543, lr_0 = 1.3298e-04
Loss = 9.2387e-04, PNorm = 154.7925, GNorm = 0.0389, lr_0 = 1.3289e-04
Loss = 7.3708e-04, PNorm = 154.7946, GNorm = 0.0720, lr_0 = 1.3280e-04
Loss = 1.2418e-03, PNorm = 154.7960, GNorm = 0.0475, lr_0 = 1.3270e-04
Loss = 1.1190e-03, PNorm = 154.7975, GNorm = 0.1602, lr_0 = 1.3261e-04
Loss = 1.7937e-03, PNorm = 154.7986, GNorm = 0.0772, lr_0 = 1.3252e-04
Loss = 1.0158e-03, PNorm = 154.7980, GNorm = 0.2239, lr_0 = 1.3243e-04
Loss = 1.9201e-03, PNorm = 154.7994, GNorm = 0.0504, lr_0 = 1.3234e-04
Loss = 1.3271e-03, PNorm = 154.7997, GNorm = 0.0780, lr_0 = 1.3225e-04
Loss = 9.4179e-04, PNorm = 154.8026, GNorm = 0.1060, lr_0 = 1.3216e-04
Loss = 8.3653e-04, PNorm = 154.8046, GNorm = 0.0527, lr_0 = 1.3207e-04
Loss = 1.6949e-03, PNorm = 154.8073, GNorm = 0.0503, lr_0 = 1.3198e-04
Loss = 2.4336e-03, PNorm = 154.8089, GNorm = 0.1074, lr_0 = 1.3189e-04
Loss = 8.4888e-04, PNorm = 154.8102, GNorm = 0.0999, lr_0 = 1.3180e-04
Loss = 2.3727e-03, PNorm = 154.8139, GNorm = 0.0832, lr_0 = 1.3171e-04
Loss = 2.3228e-03, PNorm = 154.8146, GNorm = 0.1820, lr_0 = 1.3162e-04
Loss = 1.3292e-03, PNorm = 154.8168, GNorm = 0.0689, lr_0 = 1.3153e-04
Loss = 8.1930e-04, PNorm = 154.8184, GNorm = 0.0873, lr_0 = 1.3144e-04
Loss = 2.6396e-03, PNorm = 154.8200, GNorm = 0.0878, lr_0 = 1.3135e-04
Loss = 7.7188e-04, PNorm = 154.8229, GNorm = 0.2902, lr_0 = 1.3126e-04
Loss = 7.1013e-04, PNorm = 154.8250, GNorm = 0.1091, lr_0 = 1.3117e-04
Loss = 2.3299e-03, PNorm = 154.8275, GNorm = 0.0989, lr_0 = 1.3108e-04
Loss = 2.7115e-03, PNorm = 154.8294, GNorm = 0.2179, lr_0 = 1.3099e-04
Loss = 1.3525e-03, PNorm = 154.8320, GNorm = 0.1295, lr_0 = 1.3090e-04
Loss = 9.5370e-04, PNorm = 154.8333, GNorm = 0.1910, lr_0 = 1.3081e-04
Loss = 6.7902e-04, PNorm = 154.8344, GNorm = 0.0354, lr_0 = 1.3072e-04
Loss = 8.3481e-04, PNorm = 154.8384, GNorm = 0.2229, lr_0 = 1.3063e-04
Loss = 7.2781e-04, PNorm = 154.8391, GNorm = 0.0645, lr_0 = 1.3054e-04
Loss = 2.2282e-03, PNorm = 154.8400, GNorm = 0.1305, lr_0 = 1.3045e-04
Loss = 1.6765e-03, PNorm = 154.8412, GNorm = 0.0910, lr_0 = 1.3036e-04
Loss = 1.8988e-03, PNorm = 154.8424, GNorm = 0.1359, lr_0 = 1.3027e-04
Loss = 1.0598e-03, PNorm = 154.8449, GNorm = 0.1427, lr_0 = 1.3018e-04
Loss = 1.2723e-03, PNorm = 154.8475, GNorm = 0.0397, lr_0 = 1.3009e-04
Loss = 1.1065e-03, PNorm = 154.8493, GNorm = 0.0808, lr_0 = 1.3000e-04
Loss = 1.5682e-03, PNorm = 154.8530, GNorm = 0.0930, lr_0 = 1.2992e-04
Loss = 1.5344e-03, PNorm = 154.8562, GNorm = 0.1074, lr_0 = 1.2983e-04
Loss = 8.9353e-04, PNorm = 154.8583, GNorm = 0.0316, lr_0 = 1.2974e-04
Loss = 8.1432e-04, PNorm = 154.8593, GNorm = 0.3312, lr_0 = 1.2965e-04
Loss = 1.5447e-03, PNorm = 154.8596, GNorm = 0.0999, lr_0 = 1.2956e-04
Loss = 2.9404e-03, PNorm = 154.8635, GNorm = 0.1946, lr_0 = 1.2947e-04
Loss = 9.2294e-04, PNorm = 154.8673, GNorm = 0.0966, lr_0 = 1.2938e-04
Loss = 1.4166e-03, PNorm = 154.8697, GNorm = 0.0699, lr_0 = 1.2929e-04
Loss = 6.8585e-04, PNorm = 154.8714, GNorm = 0.1008, lr_0 = 1.2921e-04
Loss = 1.6873e-03, PNorm = 154.8732, GNorm = 0.0900, lr_0 = 1.2912e-04
Loss = 1.7613e-03, PNorm = 154.8764, GNorm = 0.0916, lr_0 = 1.2903e-04
Loss = 1.0394e-03, PNorm = 154.8777, GNorm = 0.1841, lr_0 = 1.2894e-04
Loss = 6.4942e-03, PNorm = 154.8802, GNorm = 0.2947, lr_0 = 1.2885e-04
Loss = 1.2819e-03, PNorm = 154.8811, GNorm = 0.0668, lr_0 = 1.2876e-04
Loss = 1.3187e-03, PNorm = 154.8822, GNorm = 0.1580, lr_0 = 1.2867e-04
Loss = 1.2450e-03, PNorm = 154.8835, GNorm = 0.0350, lr_0 = 1.2859e-04
Loss = 7.3803e-04, PNorm = 154.8856, GNorm = 0.1138, lr_0 = 1.2850e-04
Loss = 1.4943e-03, PNorm = 154.8876, GNorm = 0.1671, lr_0 = 1.2841e-04
Loss = 1.9535e-03, PNorm = 154.8907, GNorm = 0.0539, lr_0 = 1.2832e-04
Loss = 1.9661e-03, PNorm = 154.8924, GNorm = 0.1041, lr_0 = 1.2823e-04
Loss = 7.9681e-04, PNorm = 154.8956, GNorm = 0.1065, lr_0 = 1.2815e-04
Loss = 1.0604e-03, PNorm = 154.8974, GNorm = 0.0756, lr_0 = 1.2806e-04
Loss = 1.1634e-03, PNorm = 154.8976, GNorm = 0.0499, lr_0 = 1.2797e-04
Validation mae = 0.475492
Epoch 27
Loss = 1.7348e-03, PNorm = 154.8991, GNorm = 0.2382, lr_0 = 1.2788e-04
Loss = 5.2146e-04, PNorm = 154.9000, GNorm = 0.0403, lr_0 = 1.2780e-04
Loss = 6.1887e-04, PNorm = 154.9007, GNorm = 0.1161, lr_0 = 1.2771e-04
Loss = 7.5304e-04, PNorm = 154.9008, GNorm = 0.0347, lr_0 = 1.2762e-04
Loss = 1.9517e-03, PNorm = 154.9011, GNorm = 0.0574, lr_0 = 1.2753e-04
Loss = 7.1037e-04, PNorm = 154.9031, GNorm = 0.0652, lr_0 = 1.2745e-04
Loss = 1.0276e-03, PNorm = 154.9038, GNorm = 0.1165, lr_0 = 1.2736e-04
Loss = 2.6022e-03, PNorm = 154.9059, GNorm = 0.0608, lr_0 = 1.2727e-04
Loss = 7.4442e-04, PNorm = 154.9067, GNorm = 0.1330, lr_0 = 1.2718e-04
Loss = 1.2211e-03, PNorm = 154.9085, GNorm = 0.0769, lr_0 = 1.2710e-04
Loss = 2.3007e-03, PNorm = 154.9109, GNorm = 0.0591, lr_0 = 1.2701e-04
Loss = 8.9023e-04, PNorm = 154.9138, GNorm = 0.0693, lr_0 = 1.2692e-04
Loss = 5.2933e-04, PNorm = 154.9144, GNorm = 0.1438, lr_0 = 1.2684e-04
Loss = 1.4428e-03, PNorm = 154.9159, GNorm = 0.0510, lr_0 = 1.2675e-04
Loss = 6.3696e-04, PNorm = 154.9168, GNorm = 0.0514, lr_0 = 1.2666e-04
Loss = 2.0501e-03, PNorm = 154.9184, GNorm = 0.0439, lr_0 = 1.2658e-04
Loss = 1.5832e-03, PNorm = 154.9208, GNorm = 0.0921, lr_0 = 1.2649e-04
Loss = 7.2210e-04, PNorm = 154.9226, GNorm = 0.0353, lr_0 = 1.2640e-04
Loss = 9.9174e-04, PNorm = 154.9260, GNorm = 0.0491, lr_0 = 1.2632e-04
Loss = 1.3886e-03, PNorm = 154.9295, GNorm = 0.0856, lr_0 = 1.2623e-04
Loss = 1.9848e-03, PNorm = 154.9296, GNorm = 0.1752, lr_0 = 1.2614e-04
Loss = 1.4180e-03, PNorm = 154.9302, GNorm = 0.0377, lr_0 = 1.2606e-04
Loss = 1.1922e-03, PNorm = 154.9304, GNorm = 0.1328, lr_0 = 1.2597e-04
Loss = 7.3210e-04, PNorm = 154.9325, GNorm = 0.0432, lr_0 = 1.2588e-04
Loss = 2.3829e-03, PNorm = 154.9337, GNorm = 0.1493, lr_0 = 1.2580e-04
Loss = 5.9850e-04, PNorm = 154.9333, GNorm = 0.0546, lr_0 = 1.2571e-04
Loss = 1.6370e-03, PNorm = 154.9350, GNorm = 0.0404, lr_0 = 1.2563e-04
Loss = 1.5405e-03, PNorm = 154.9351, GNorm = 0.0977, lr_0 = 1.2554e-04
Loss = 2.7038e-03, PNorm = 154.9366, GNorm = 0.3450, lr_0 = 1.2545e-04
Loss = 6.9287e-04, PNorm = 154.9384, GNorm = 0.0731, lr_0 = 1.2537e-04
Loss = 1.6698e-03, PNorm = 154.9381, GNorm = 0.1468, lr_0 = 1.2528e-04
Loss = 1.8294e-03, PNorm = 154.9401, GNorm = 0.1506, lr_0 = 1.2520e-04
Loss = 7.0033e-04, PNorm = 154.9403, GNorm = 0.0172, lr_0 = 1.2511e-04
Loss = 1.1891e-03, PNorm = 154.9423, GNorm = 0.0729, lr_0 = 1.2502e-04
Loss = 6.1300e-04, PNorm = 154.9435, GNorm = 0.0410, lr_0 = 1.2494e-04
Loss = 6.3900e-04, PNorm = 154.9449, GNorm = 0.0305, lr_0 = 1.2485e-04
Loss = 1.4346e-03, PNorm = 154.9474, GNorm = 0.0656, lr_0 = 1.2477e-04
Loss = 1.5323e-03, PNorm = 154.9486, GNorm = 0.0749, lr_0 = 1.2468e-04
Loss = 1.1378e-03, PNorm = 154.9500, GNorm = 0.1252, lr_0 = 1.2460e-04
Loss = 7.6230e-04, PNorm = 154.9510, GNorm = 0.1036, lr_0 = 1.2451e-04
Loss = 1.1622e-03, PNorm = 154.9535, GNorm = 0.2731, lr_0 = 1.2443e-04
Loss = 1.0783e-03, PNorm = 154.9563, GNorm = 0.0582, lr_0 = 1.2434e-04
Loss = 1.5831e-03, PNorm = 154.9576, GNorm = 0.1140, lr_0 = 1.2426e-04
Loss = 1.4624e-03, PNorm = 154.9595, GNorm = 0.1207, lr_0 = 1.2417e-04
Loss = 6.3681e-04, PNorm = 154.9601, GNorm = 0.1127, lr_0 = 1.2409e-04
Loss = 1.3998e-03, PNorm = 154.9600, GNorm = 0.0728, lr_0 = 1.2400e-04
Loss = 1.1815e-03, PNorm = 154.9619, GNorm = 0.1551, lr_0 = 1.2392e-04
Loss = 9.5892e-04, PNorm = 154.9630, GNorm = 0.1189, lr_0 = 1.2383e-04
Loss = 5.7961e-04, PNorm = 154.9632, GNorm = 0.0583, lr_0 = 1.2375e-04
Loss = 1.5947e-03, PNorm = 154.9643, GNorm = 0.0896, lr_0 = 1.2366e-04
Loss = 6.6831e-04, PNorm = 154.9663, GNorm = 0.1945, lr_0 = 1.2358e-04
Loss = 1.2324e-03, PNorm = 154.9680, GNorm = 0.0877, lr_0 = 1.2349e-04
Loss = 1.6164e-03, PNorm = 154.9693, GNorm = 0.1104, lr_0 = 1.2341e-04
Loss = 6.3420e-04, PNorm = 154.9724, GNorm = 0.0590, lr_0 = 1.2332e-04
Loss = 4.8763e-04, PNorm = 154.9734, GNorm = 0.0738, lr_0 = 1.2324e-04
Loss = 6.2918e-04, PNorm = 154.9753, GNorm = 0.0188, lr_0 = 1.2315e-04
Loss = 6.1816e-04, PNorm = 154.9762, GNorm = 0.1586, lr_0 = 1.2307e-04
Loss = 1.3517e-03, PNorm = 154.9769, GNorm = 0.0331, lr_0 = 1.2298e-04
Loss = 7.7389e-04, PNorm = 154.9798, GNorm = 0.0655, lr_0 = 1.2290e-04
Loss = 6.5537e-04, PNorm = 154.9804, GNorm = 0.1780, lr_0 = 1.2282e-04
Loss = 5.2796e-04, PNorm = 154.9820, GNorm = 0.0791, lr_0 = 1.2273e-04
Loss = 6.2700e-04, PNorm = 154.9826, GNorm = 0.0420, lr_0 = 1.2265e-04
Loss = 2.7315e-03, PNorm = 154.9844, GNorm = 0.0247, lr_0 = 1.2256e-04
Loss = 1.9414e-03, PNorm = 154.9863, GNorm = 0.0376, lr_0 = 1.2248e-04
Loss = 6.2102e-04, PNorm = 154.9879, GNorm = 0.0361, lr_0 = 1.2240e-04
Loss = 1.0306e-03, PNorm = 154.9898, GNorm = 0.2252, lr_0 = 1.2231e-04
Loss = 1.2552e-03, PNorm = 154.9912, GNorm = 0.1449, lr_0 = 1.2223e-04
Loss = 1.4772e-03, PNorm = 154.9939, GNorm = 0.1250, lr_0 = 1.2214e-04
Loss = 1.7750e-03, PNorm = 154.9939, GNorm = 0.1910, lr_0 = 1.2206e-04
Loss = 1.0504e-03, PNorm = 154.9962, GNorm = 0.1824, lr_0 = 1.2198e-04
Loss = 1.1107e-03, PNorm = 154.9992, GNorm = 0.2348, lr_0 = 1.2189e-04
Loss = 2.2026e-03, PNorm = 155.0024, GNorm = 0.1538, lr_0 = 1.2181e-04
Loss = 6.5737e-04, PNorm = 155.0028, GNorm = 0.0282, lr_0 = 1.2173e-04
Loss = 6.1085e-04, PNorm = 155.0042, GNorm = 0.0986, lr_0 = 1.2164e-04
Loss = 1.2703e-03, PNorm = 155.0062, GNorm = 0.1518, lr_0 = 1.2156e-04
Loss = 5.4452e-04, PNorm = 155.0054, GNorm = 0.0908, lr_0 = 1.2148e-04
Loss = 1.4150e-03, PNorm = 155.0065, GNorm = 0.0687, lr_0 = 1.2139e-04
Loss = 1.0980e-03, PNorm = 155.0087, GNorm = 0.0981, lr_0 = 1.2131e-04
Loss = 3.1286e-03, PNorm = 155.0095, GNorm = 0.0852, lr_0 = 1.2123e-04
Loss = 5.4876e-04, PNorm = 155.0100, GNorm = 0.1224, lr_0 = 1.2114e-04
Loss = 8.1634e-04, PNorm = 155.0122, GNorm = 0.2923, lr_0 = 1.2106e-04
Loss = 7.8712e-04, PNorm = 155.0147, GNorm = 0.0233, lr_0 = 1.2098e-04
Loss = 6.4249e-04, PNorm = 155.0162, GNorm = 0.0530, lr_0 = 1.2090e-04
Loss = 6.2959e-04, PNorm = 155.0176, GNorm = 0.1219, lr_0 = 1.2081e-04
Loss = 1.3562e-03, PNorm = 155.0173, GNorm = 0.0497, lr_0 = 1.2073e-04
Loss = 1.2070e-03, PNorm = 155.0192, GNorm = 0.0773, lr_0 = 1.2065e-04
Loss = 1.0664e-03, PNorm = 155.0207, GNorm = 0.1141, lr_0 = 1.2056e-04
Loss = 2.5677e-03, PNorm = 155.0222, GNorm = 0.0985, lr_0 = 1.2048e-04
Loss = 8.6528e-04, PNorm = 155.0234, GNorm = 0.0868, lr_0 = 1.2040e-04
Loss = 1.2787e-03, PNorm = 155.0252, GNorm = 0.0438, lr_0 = 1.2032e-04
Loss = 1.6137e-03, PNorm = 155.0255, GNorm = 0.1225, lr_0 = 1.2023e-04
Loss = 1.3705e-03, PNorm = 155.0282, GNorm = 0.0510, lr_0 = 1.2015e-04
Loss = 1.2087e-03, PNorm = 155.0279, GNorm = 0.1228, lr_0 = 1.2007e-04
Loss = 1.4001e-03, PNorm = 155.0293, GNorm = 0.2762, lr_0 = 1.1999e-04
Loss = 2.3570e-03, PNorm = 155.0317, GNorm = 0.2011, lr_0 = 1.1991e-04
Loss = 2.0775e-03, PNorm = 155.0334, GNorm = 0.0582, lr_0 = 1.1982e-04
Loss = 6.1563e-04, PNorm = 155.0340, GNorm = 0.1242, lr_0 = 1.1974e-04
Loss = 2.0117e-03, PNorm = 155.0370, GNorm = 0.1986, lr_0 = 1.1966e-04
Loss = 1.4564e-03, PNorm = 155.0382, GNorm = 0.0531, lr_0 = 1.1958e-04
Loss = 6.3200e-04, PNorm = 155.0407, GNorm = 0.0657, lr_0 = 1.1950e-04
Loss = 6.7156e-04, PNorm = 155.0425, GNorm = 0.1042, lr_0 = 1.1941e-04
Loss = 1.5848e-03, PNorm = 155.0435, GNorm = 0.0607, lr_0 = 1.1933e-04
Loss = 1.2374e-03, PNorm = 155.0447, GNorm = 0.0274, lr_0 = 1.1925e-04
Loss = 5.4556e-04, PNorm = 155.0460, GNorm = 0.0882, lr_0 = 1.1917e-04
Loss = 5.7866e-04, PNorm = 155.0476, GNorm = 0.0407, lr_0 = 1.1909e-04
Loss = 8.6536e-04, PNorm = 155.0515, GNorm = 0.0366, lr_0 = 1.1901e-04
Loss = 8.5844e-04, PNorm = 155.0528, GNorm = 0.1342, lr_0 = 1.1892e-04
Loss = 3.3508e-03, PNorm = 155.0540, GNorm = 0.0745, lr_0 = 1.1884e-04
Loss = 7.5374e-04, PNorm = 155.0549, GNorm = 0.0232, lr_0 = 1.1876e-04
Loss = 5.6207e-04, PNorm = 155.0549, GNorm = 0.0913, lr_0 = 1.1868e-04
Loss = 1.4951e-03, PNorm = 155.0567, GNorm = 0.0531, lr_0 = 1.1860e-04
Loss = 4.8592e-04, PNorm = 155.0581, GNorm = 0.0380, lr_0 = 1.1852e-04
Loss = 2.4131e-03, PNorm = 155.0601, GNorm = 0.1130, lr_0 = 1.1844e-04
Loss = 1.4667e-03, PNorm = 155.0602, GNorm = 0.0396, lr_0 = 1.1835e-04
Loss = 4.6138e-03, PNorm = 155.0600, GNorm = 0.1461, lr_0 = 1.1827e-04
Loss = 1.9681e-03, PNorm = 155.0627, GNorm = 0.1354, lr_0 = 1.1819e-04
Loss = 1.6000e-03, PNorm = 155.0645, GNorm = 0.1070, lr_0 = 1.1811e-04
Loss = 3.3297e-03, PNorm = 155.0667, GNorm = 0.1827, lr_0 = 1.1803e-04
Loss = 8.1668e-04, PNorm = 155.0689, GNorm = 0.1209, lr_0 = 1.1795e-04
Loss = 1.3250e-03, PNorm = 155.0695, GNorm = 0.0383, lr_0 = 1.1787e-04
Validation mae = 0.474845
Epoch 28
Loss = 5.2948e-04, PNorm = 155.0701, GNorm = 0.0282, lr_0 = 1.1779e-04
Loss = 8.3475e-04, PNorm = 155.0710, GNorm = 0.0536, lr_0 = 1.1771e-04
Loss = 5.4255e-04, PNorm = 155.0721, GNorm = 0.0625, lr_0 = 1.1763e-04
Loss = 6.7354e-04, PNorm = 155.0730, GNorm = 0.0356, lr_0 = 1.1755e-04
Loss = 8.7816e-04, PNorm = 155.0743, GNorm = 0.0567, lr_0 = 1.1747e-04
Loss = 5.8102e-04, PNorm = 155.0748, GNorm = 0.1168, lr_0 = 1.1739e-04
Loss = 2.8270e-03, PNorm = 155.0775, GNorm = 0.0830, lr_0 = 1.1730e-04
Loss = 5.7119e-04, PNorm = 155.0792, GNorm = 0.2481, lr_0 = 1.1722e-04
Loss = 6.2166e-04, PNorm = 155.0803, GNorm = 0.1224, lr_0 = 1.1714e-04
Loss = 9.5174e-04, PNorm = 155.0821, GNorm = 0.0691, lr_0 = 1.1706e-04
Loss = 1.4506e-03, PNorm = 155.0836, GNorm = 0.0855, lr_0 = 1.1698e-04
Loss = 8.9077e-04, PNorm = 155.0862, GNorm = 0.0235, lr_0 = 1.1690e-04
Loss = 2.0674e-03, PNorm = 155.0878, GNorm = 0.1819, lr_0 = 1.1682e-04
Loss = 1.2867e-03, PNorm = 155.0877, GNorm = 0.0729, lr_0 = 1.1674e-04
Loss = 1.1741e-03, PNorm = 155.0869, GNorm = 0.0904, lr_0 = 1.1666e-04
Loss = 1.6681e-03, PNorm = 155.0882, GNorm = 0.0596, lr_0 = 1.1658e-04
Loss = 4.7600e-04, PNorm = 155.0901, GNorm = 0.0749, lr_0 = 1.1650e-04
Loss = 4.6841e-04, PNorm = 155.0898, GNorm = 0.1012, lr_0 = 1.1642e-04
Loss = 6.8153e-04, PNorm = 155.0916, GNorm = 0.1777, lr_0 = 1.1634e-04
Loss = 2.2828e-03, PNorm = 155.0937, GNorm = 0.0418, lr_0 = 1.1626e-04
Loss = 5.2146e-04, PNorm = 155.0941, GNorm = 0.0526, lr_0 = 1.1618e-04
Loss = 5.6866e-04, PNorm = 155.0934, GNorm = 0.0569, lr_0 = 1.1611e-04
Loss = 3.0874e-03, PNorm = 155.0925, GNorm = 0.1570, lr_0 = 1.1603e-04
Loss = 2.4011e-03, PNorm = 155.0933, GNorm = 0.1852, lr_0 = 1.1595e-04
Loss = 1.1218e-03, PNorm = 155.0950, GNorm = 0.0553, lr_0 = 1.1587e-04
Loss = 9.2848e-04, PNorm = 155.0956, GNorm = 0.0458, lr_0 = 1.1579e-04
Loss = 5.6676e-04, PNorm = 155.0965, GNorm = 0.0495, lr_0 = 1.1571e-04
Loss = 6.7861e-04, PNorm = 155.0976, GNorm = 0.0860, lr_0 = 1.1563e-04
Loss = 1.7894e-03, PNorm = 155.0999, GNorm = 0.1180, lr_0 = 1.1555e-04
Loss = 6.4228e-04, PNorm = 155.1015, GNorm = 0.1307, lr_0 = 1.1547e-04
Loss = 1.2565e-03, PNorm = 155.1027, GNorm = 0.0417, lr_0 = 1.1539e-04
Loss = 6.5939e-04, PNorm = 155.1041, GNorm = 0.0846, lr_0 = 1.1531e-04
Loss = 2.9006e-03, PNorm = 155.1046, GNorm = 0.1483, lr_0 = 1.1523e-04
Loss = 1.0064e-03, PNorm = 155.1050, GNorm = 0.1694, lr_0 = 1.1515e-04
Loss = 8.2831e-04, PNorm = 155.1064, GNorm = 0.0546, lr_0 = 1.1508e-04
Loss = 8.5360e-04, PNorm = 155.1072, GNorm = 0.0553, lr_0 = 1.1500e-04
Loss = 7.3293e-04, PNorm = 155.1077, GNorm = 0.1554, lr_0 = 1.1492e-04
Loss = 9.9251e-04, PNorm = 155.1088, GNorm = 0.0520, lr_0 = 1.1484e-04
Loss = 6.9120e-04, PNorm = 155.1094, GNorm = 0.0314, lr_0 = 1.1476e-04
Loss = 4.1592e-04, PNorm = 155.1093, GNorm = 0.0829, lr_0 = 1.1468e-04
Loss = 1.1541e-03, PNorm = 155.1103, GNorm = 0.1019, lr_0 = 1.1460e-04
Loss = 4.7041e-04, PNorm = 155.1113, GNorm = 0.0736, lr_0 = 1.1452e-04
Loss = 2.1565e-03, PNorm = 155.1134, GNorm = 0.0380, lr_0 = 1.1445e-04
Loss = 1.0239e-03, PNorm = 155.1145, GNorm = 0.0621, lr_0 = 1.1437e-04
Loss = 7.4543e-04, PNorm = 155.1156, GNorm = 0.0319, lr_0 = 1.1429e-04
Loss = 5.7052e-04, PNorm = 155.1168, GNorm = 0.0614, lr_0 = 1.1421e-04
Loss = 2.3638e-03, PNorm = 155.1165, GNorm = 0.1607, lr_0 = 1.1413e-04
Loss = 1.8285e-03, PNorm = 155.1174, GNorm = 0.0770, lr_0 = 1.1405e-04
Loss = 1.2663e-03, PNorm = 155.1197, GNorm = 0.0572, lr_0 = 1.1398e-04
Loss = 5.4023e-04, PNorm = 155.1217, GNorm = 0.0537, lr_0 = 1.1390e-04
Loss = 2.0912e-03, PNorm = 155.1231, GNorm = 0.0535, lr_0 = 1.1382e-04
Loss = 1.8056e-03, PNorm = 155.1243, GNorm = 0.0482, lr_0 = 1.1374e-04
Loss = 4.5363e-04, PNorm = 155.1247, GNorm = 0.1032, lr_0 = 1.1366e-04
Loss = 7.0413e-04, PNorm = 155.1275, GNorm = 0.1362, lr_0 = 1.1359e-04
Loss = 8.3847e-04, PNorm = 155.1287, GNorm = 0.2884, lr_0 = 1.1351e-04
Loss = 1.5493e-03, PNorm = 155.1314, GNorm = 0.1089, lr_0 = 1.1343e-04
Loss = 8.3534e-04, PNorm = 155.1327, GNorm = 0.2620, lr_0 = 1.1335e-04
Loss = 4.8234e-04, PNorm = 155.1335, GNorm = 0.0252, lr_0 = 1.1328e-04
Loss = 1.0693e-03, PNorm = 155.1365, GNorm = 0.1273, lr_0 = 1.1320e-04
Loss = 1.1873e-03, PNorm = 155.1372, GNorm = 0.2153, lr_0 = 1.1312e-04
Loss = 1.7715e-03, PNorm = 155.1376, GNorm = 0.0850, lr_0 = 1.1304e-04
Loss = 4.9713e-04, PNorm = 155.1374, GNorm = 0.0307, lr_0 = 1.1297e-04
Loss = 4.3012e-04, PNorm = 155.1384, GNorm = 0.0697, lr_0 = 1.1289e-04
Loss = 9.4501e-04, PNorm = 155.1399, GNorm = 0.0320, lr_0 = 1.1281e-04
Loss = 2.2881e-03, PNorm = 155.1420, GNorm = 0.1168, lr_0 = 1.1273e-04
Loss = 6.1451e-04, PNorm = 155.1437, GNorm = 0.0649, lr_0 = 1.1266e-04
Loss = 1.5688e-03, PNorm = 155.1446, GNorm = 0.1209, lr_0 = 1.1258e-04
Loss = 8.4170e-04, PNorm = 155.1452, GNorm = 0.0811, lr_0 = 1.1250e-04
Loss = 3.2059e-03, PNorm = 155.1450, GNorm = 0.0756, lr_0 = 1.1243e-04
Loss = 1.8165e-03, PNorm = 155.1445, GNorm = 0.1365, lr_0 = 1.1235e-04
Loss = 2.0898e-03, PNorm = 155.1456, GNorm = 0.0819, lr_0 = 1.1227e-04
Loss = 2.3093e-03, PNorm = 155.1475, GNorm = 0.0699, lr_0 = 1.1219e-04
Loss = 9.7331e-04, PNorm = 155.1495, GNorm = 0.0932, lr_0 = 1.1212e-04
Loss = 1.4074e-03, PNorm = 155.1510, GNorm = 0.0279, lr_0 = 1.1204e-04
Loss = 1.1040e-03, PNorm = 155.1540, GNorm = 0.0337, lr_0 = 1.1196e-04
Loss = 6.4420e-04, PNorm = 155.1557, GNorm = 0.1093, lr_0 = 1.1189e-04
Loss = 1.6948e-03, PNorm = 155.1574, GNorm = 0.0566, lr_0 = 1.1181e-04
Loss = 1.6514e-03, PNorm = 155.1593, GNorm = 0.1590, lr_0 = 1.1173e-04
Loss = 1.3520e-03, PNorm = 155.1576, GNorm = 0.1232, lr_0 = 1.1166e-04
Loss = 3.8184e-03, PNorm = 155.1590, GNorm = 0.0575, lr_0 = 1.1158e-04
Loss = 5.3692e-04, PNorm = 155.1597, GNorm = 0.0259, lr_0 = 1.1150e-04
Loss = 9.7665e-04, PNorm = 155.1611, GNorm = 0.0809, lr_0 = 1.1143e-04
Loss = 2.4537e-03, PNorm = 155.1632, GNorm = 0.0820, lr_0 = 1.1135e-04
Loss = 6.9695e-04, PNorm = 155.1665, GNorm = 0.0767, lr_0 = 1.1128e-04
Loss = 5.6458e-04, PNorm = 155.1695, GNorm = 0.0470, lr_0 = 1.1120e-04
Loss = 6.0302e-04, PNorm = 155.1707, GNorm = 0.1762, lr_0 = 1.1112e-04
Loss = 2.1970e-03, PNorm = 155.1709, GNorm = 0.3467, lr_0 = 1.1105e-04
Loss = 3.7517e-03, PNorm = 155.1728, GNorm = 0.2338, lr_0 = 1.1097e-04
Loss = 6.5151e-04, PNorm = 155.1750, GNorm = 0.1041, lr_0 = 1.1089e-04
Loss = 4.7300e-04, PNorm = 155.1770, GNorm = 0.0390, lr_0 = 1.1082e-04
Loss = 4.4014e-04, PNorm = 155.1772, GNorm = 0.0627, lr_0 = 1.1074e-04
Loss = 7.2967e-04, PNorm = 155.1791, GNorm = 0.0415, lr_0 = 1.1067e-04
Loss = 5.5294e-04, PNorm = 155.1807, GNorm = 0.0196, lr_0 = 1.1059e-04
Loss = 1.4599e-03, PNorm = 155.1827, GNorm = 0.1334, lr_0 = 1.1052e-04
Loss = 6.9016e-04, PNorm = 155.1841, GNorm = 0.0473, lr_0 = 1.1044e-04
Loss = 3.9795e-04, PNorm = 155.1843, GNorm = 0.0581, lr_0 = 1.1036e-04
Loss = 1.6004e-03, PNorm = 155.1857, GNorm = 0.0490, lr_0 = 1.1029e-04
Loss = 7.7404e-04, PNorm = 155.1867, GNorm = 0.0688, lr_0 = 1.1021e-04
Loss = 8.7078e-04, PNorm = 155.1877, GNorm = 0.2403, lr_0 = 1.1014e-04
Loss = 6.6167e-04, PNorm = 155.1888, GNorm = 0.0734, lr_0 = 1.1006e-04
Loss = 1.7056e-03, PNorm = 155.1909, GNorm = 0.0645, lr_0 = 1.0999e-04
Loss = 6.6882e-04, PNorm = 155.1917, GNorm = 0.0814, lr_0 = 1.0991e-04
Loss = 4.7507e-04, PNorm = 155.1933, GNorm = 0.1071, lr_0 = 1.0984e-04
Loss = 1.1342e-03, PNorm = 155.1953, GNorm = 0.0392, lr_0 = 1.0976e-04
Loss = 8.8497e-04, PNorm = 155.1964, GNorm = 0.0541, lr_0 = 1.0969e-04
Loss = 1.5823e-03, PNorm = 155.1967, GNorm = 0.0700, lr_0 = 1.0961e-04
Loss = 5.4146e-04, PNorm = 155.1974, GNorm = 0.1102, lr_0 = 1.0954e-04
Loss = 6.2827e-04, PNorm = 155.1988, GNorm = 0.1681, lr_0 = 1.0946e-04
Loss = 8.8271e-04, PNorm = 155.1988, GNorm = 0.1005, lr_0 = 1.0939e-04
Loss = 7.7248e-04, PNorm = 155.1999, GNorm = 0.0805, lr_0 = 1.0931e-04
Loss = 1.2571e-03, PNorm = 155.2024, GNorm = 0.0407, lr_0 = 1.0924e-04
Loss = 1.5771e-03, PNorm = 155.2060, GNorm = 0.1202, lr_0 = 1.0916e-04
Loss = 5.7406e-04, PNorm = 155.2067, GNorm = 0.1605, lr_0 = 1.0909e-04
Loss = 1.4277e-03, PNorm = 155.2087, GNorm = 0.0572, lr_0 = 1.0901e-04
Loss = 1.1285e-03, PNorm = 155.2094, GNorm = 0.1095, lr_0 = 1.0894e-04
Loss = 2.5815e-03, PNorm = 155.2095, GNorm = 0.2264, lr_0 = 1.0886e-04
Loss = 6.9066e-04, PNorm = 155.2105, GNorm = 0.0549, lr_0 = 1.0879e-04
Loss = 6.1671e-04, PNorm = 155.2123, GNorm = 0.0376, lr_0 = 1.0871e-04
Loss = 6.3455e-04, PNorm = 155.2131, GNorm = 0.0352, lr_0 = 1.0864e-04
Loss = 1.0510e-03, PNorm = 155.2159, GNorm = 0.0834, lr_0 = 1.0856e-04
Validation mae = 0.474694
Epoch 29
Loss = 4.8218e-04, PNorm = 155.2175, GNorm = 0.0251, lr_0 = 1.0849e-04
Loss = 4.0065e-04, PNorm = 155.2178, GNorm = 0.0320, lr_0 = 1.0841e-04
Loss = 6.3361e-04, PNorm = 155.2190, GNorm = 0.0308, lr_0 = 1.0834e-04
Loss = 8.7275e-04, PNorm = 155.2194, GNorm = 0.2681, lr_0 = 1.0827e-04
Loss = 7.2458e-04, PNorm = 155.2202, GNorm = 0.0741, lr_0 = 1.0819e-04
Loss = 1.4472e-03, PNorm = 155.2199, GNorm = 0.1257, lr_0 = 1.0812e-04
Loss = 5.2025e-04, PNorm = 155.2199, GNorm = 0.1115, lr_0 = 1.0804e-04
Loss = 5.5743e-04, PNorm = 155.2202, GNorm = 0.0312, lr_0 = 1.0797e-04
Loss = 5.8495e-04, PNorm = 155.2207, GNorm = 0.1000, lr_0 = 1.0790e-04
Loss = 3.0234e-03, PNorm = 155.2215, GNorm = 0.0262, lr_0 = 1.0782e-04
Loss = 1.1327e-03, PNorm = 155.2212, GNorm = 0.0898, lr_0 = 1.0775e-04
Loss = 3.3812e-03, PNorm = 155.2223, GNorm = 0.0682, lr_0 = 1.0767e-04
Loss = 6.8945e-04, PNorm = 155.2238, GNorm = 0.1150, lr_0 = 1.0760e-04
Loss = 1.4641e-03, PNorm = 155.2265, GNorm = 0.1204, lr_0 = 1.0753e-04
Loss = 4.2644e-04, PNorm = 155.2283, GNorm = 0.0242, lr_0 = 1.0745e-04
Loss = 1.0767e-03, PNorm = 155.2291, GNorm = 0.0671, lr_0 = 1.0738e-04
Loss = 5.9184e-04, PNorm = 155.2283, GNorm = 0.0219, lr_0 = 1.0731e-04
Loss = 2.5510e-03, PNorm = 155.2308, GNorm = 0.1036, lr_0 = 1.0723e-04
Loss = 1.0111e-03, PNorm = 155.2322, GNorm = 0.1125, lr_0 = 1.0716e-04
Loss = 1.1050e-03, PNorm = 155.2344, GNorm = 0.1219, lr_0 = 1.0709e-04
Loss = 8.1815e-04, PNorm = 155.2357, GNorm = 0.1505, lr_0 = 1.0701e-04
Loss = 3.3899e-03, PNorm = 155.2374, GNorm = 0.1091, lr_0 = 1.0694e-04
Loss = 3.9538e-04, PNorm = 155.2387, GNorm = 0.0444, lr_0 = 1.0687e-04
Loss = 5.2881e-04, PNorm = 155.2404, GNorm = 0.0771, lr_0 = 1.0679e-04
Loss = 1.6511e-03, PNorm = 155.2418, GNorm = 0.0241, lr_0 = 1.0672e-04
Loss = 4.1177e-04, PNorm = 155.2429, GNorm = 0.0523, lr_0 = 1.0665e-04
Loss = 1.6978e-03, PNorm = 155.2433, GNorm = 0.1729, lr_0 = 1.0657e-04
Loss = 1.1142e-03, PNorm = 155.2429, GNorm = 0.3737, lr_0 = 1.0650e-04
Loss = 7.3802e-04, PNorm = 155.2425, GNorm = 0.0250, lr_0 = 1.0643e-04
Loss = 5.2430e-04, PNorm = 155.2451, GNorm = 0.1035, lr_0 = 1.0635e-04
Loss = 5.6880e-04, PNorm = 155.2457, GNorm = 0.1031, lr_0 = 1.0628e-04
Loss = 7.4359e-04, PNorm = 155.2453, GNorm = 0.0635, lr_0 = 1.0621e-04
Loss = 7.9955e-04, PNorm = 155.2463, GNorm = 0.0447, lr_0 = 1.0614e-04
Loss = 3.4433e-03, PNorm = 155.2456, GNorm = 0.2334, lr_0 = 1.0606e-04
Loss = 4.2086e-04, PNorm = 155.2461, GNorm = 0.0775, lr_0 = 1.0599e-04
Loss = 1.7486e-03, PNorm = 155.2468, GNorm = 0.1047, lr_0 = 1.0592e-04
Loss = 7.5074e-04, PNorm = 155.2469, GNorm = 0.0382, lr_0 = 1.0585e-04
Loss = 6.3648e-04, PNorm = 155.2488, GNorm = 0.0299, lr_0 = 1.0577e-04
Loss = 5.4558e-04, PNorm = 155.2507, GNorm = 0.0658, lr_0 = 1.0570e-04
Loss = 5.1015e-04, PNorm = 155.2532, GNorm = 0.0332, lr_0 = 1.0563e-04
Loss = 4.7311e-04, PNorm = 155.2535, GNorm = 0.1478, lr_0 = 1.0556e-04
Loss = 1.0190e-03, PNorm = 155.2537, GNorm = 0.0777, lr_0 = 1.0548e-04
Loss = 3.8070e-04, PNorm = 155.2545, GNorm = 0.0883, lr_0 = 1.0541e-04
Loss = 2.0245e-03, PNorm = 155.2549, GNorm = 0.0871, lr_0 = 1.0534e-04
Loss = 4.1462e-04, PNorm = 155.2568, GNorm = 0.0496, lr_0 = 1.0527e-04
Loss = 1.0412e-03, PNorm = 155.2577, GNorm = 0.0975, lr_0 = 1.0519e-04
Loss = 7.8326e-04, PNorm = 155.2598, GNorm = 0.1305, lr_0 = 1.0512e-04
Loss = 2.7602e-03, PNorm = 155.2622, GNorm = 0.0872, lr_0 = 1.0505e-04
Loss = 1.0934e-03, PNorm = 155.2635, GNorm = 0.0311, lr_0 = 1.0498e-04
Loss = 1.1644e-03, PNorm = 155.2637, GNorm = 0.1465, lr_0 = 1.0491e-04
Loss = 1.0284e-03, PNorm = 155.2643, GNorm = 0.0666, lr_0 = 1.0483e-04
Loss = 1.5710e-03, PNorm = 155.2659, GNorm = 0.1321, lr_0 = 1.0476e-04
Loss = 7.9585e-04, PNorm = 155.2679, GNorm = 0.1329, lr_0 = 1.0469e-04
Loss = 5.4650e-04, PNorm = 155.2694, GNorm = 0.1324, lr_0 = 1.0462e-04
Loss = 1.0784e-03, PNorm = 155.2706, GNorm = 0.0430, lr_0 = 1.0455e-04
Loss = 9.8723e-04, PNorm = 155.2711, GNorm = 0.1171, lr_0 = 1.0448e-04
Loss = 7.9057e-04, PNorm = 155.2722, GNorm = 0.0739, lr_0 = 1.0440e-04
Loss = 4.3310e-04, PNorm = 155.2718, GNorm = 0.0507, lr_0 = 1.0433e-04
Loss = 8.0019e-04, PNorm = 155.2723, GNorm = 0.0573, lr_0 = 1.0426e-04
Loss = 3.8050e-04, PNorm = 155.2729, GNorm = 0.0877, lr_0 = 1.0419e-04
Loss = 1.0542e-03, PNorm = 155.2725, GNorm = 0.0670, lr_0 = 1.0412e-04
Loss = 1.7802e-03, PNorm = 155.2726, GNorm = 0.0781, lr_0 = 1.0405e-04
Loss = 1.0784e-03, PNorm = 155.2738, GNorm = 0.0644, lr_0 = 1.0398e-04
Loss = 6.8427e-04, PNorm = 155.2747, GNorm = 0.0970, lr_0 = 1.0391e-04
Loss = 7.0304e-04, PNorm = 155.2759, GNorm = 0.1116, lr_0 = 1.0383e-04
Loss = 1.7698e-03, PNorm = 155.2775, GNorm = 0.0793, lr_0 = 1.0376e-04
Loss = 8.3405e-04, PNorm = 155.2798, GNorm = 0.2002, lr_0 = 1.0369e-04
Loss = 1.0700e-03, PNorm = 155.2826, GNorm = 0.1031, lr_0 = 1.0362e-04
Loss = 4.2822e-04, PNorm = 155.2855, GNorm = 0.0276, lr_0 = 1.0355e-04
Loss = 4.1647e-04, PNorm = 155.2872, GNorm = 0.0575, lr_0 = 1.0348e-04
Loss = 2.0788e-03, PNorm = 155.2890, GNorm = 0.0610, lr_0 = 1.0341e-04
Loss = 7.7990e-04, PNorm = 155.2898, GNorm = 0.0499, lr_0 = 1.0334e-04
Loss = 1.3177e-03, PNorm = 155.2907, GNorm = 0.0712, lr_0 = 1.0327e-04
Loss = 1.7541e-03, PNorm = 155.2929, GNorm = 0.1061, lr_0 = 1.0320e-04
Loss = 1.0683e-03, PNorm = 155.2931, GNorm = 0.0450, lr_0 = 1.0312e-04
Loss = 7.9940e-04, PNorm = 155.2951, GNorm = 0.0568, lr_0 = 1.0305e-04
Loss = 8.1274e-04, PNorm = 155.2941, GNorm = 0.0459, lr_0 = 1.0298e-04
Loss = 6.3175e-04, PNorm = 155.2954, GNorm = 0.1768, lr_0 = 1.0291e-04
Loss = 9.9477e-04, PNorm = 155.2980, GNorm = 0.0769, lr_0 = 1.0284e-04
Loss = 4.4594e-04, PNorm = 155.2998, GNorm = 0.0924, lr_0 = 1.0277e-04
Loss = 1.0159e-03, PNorm = 155.3010, GNorm = 0.1138, lr_0 = 1.0270e-04
Loss = 1.9014e-03, PNorm = 155.3008, GNorm = 0.0885, lr_0 = 1.0263e-04
Loss = 1.2882e-03, PNorm = 155.3025, GNorm = 0.1184, lr_0 = 1.0256e-04
Loss = 1.0730e-03, PNorm = 155.3030, GNorm = 0.0212, lr_0 = 1.0249e-04
Loss = 4.5318e-04, PNorm = 155.3035, GNorm = 0.0767, lr_0 = 1.0242e-04
Loss = 1.1468e-03, PNorm = 155.3048, GNorm = 0.0177, lr_0 = 1.0235e-04
Loss = 1.0911e-03, PNorm = 155.3049, GNorm = 0.0502, lr_0 = 1.0228e-04
Loss = 6.2163e-04, PNorm = 155.3060, GNorm = 0.0410, lr_0 = 1.0221e-04
Loss = 2.1177e-03, PNorm = 155.3077, GNorm = 0.0694, lr_0 = 1.0214e-04
Loss = 4.2256e-04, PNorm = 155.3084, GNorm = 0.0551, lr_0 = 1.0207e-04
Loss = 7.4941e-04, PNorm = 155.3085, GNorm = 0.0972, lr_0 = 1.0200e-04
Loss = 4.3404e-04, PNorm = 155.3090, GNorm = 0.0642, lr_0 = 1.0193e-04
Loss = 3.8479e-04, PNorm = 155.3108, GNorm = 0.0292, lr_0 = 1.0186e-04
Loss = 1.0816e-03, PNorm = 155.3111, GNorm = 0.0745, lr_0 = 1.0179e-04
Loss = 7.7114e-04, PNorm = 155.3110, GNorm = 0.1123, lr_0 = 1.0172e-04
Loss = 5.2779e-04, PNorm = 155.3131, GNorm = 0.0326, lr_0 = 1.0165e-04
Loss = 4.6228e-04, PNorm = 155.3159, GNorm = 0.0553, lr_0 = 1.0158e-04
Loss = 1.4888e-03, PNorm = 155.3168, GNorm = 0.1224, lr_0 = 1.0151e-04
Loss = 7.9102e-04, PNorm = 155.3169, GNorm = 0.1074, lr_0 = 1.0144e-04
Loss = 8.6495e-04, PNorm = 155.3182, GNorm = 0.0893, lr_0 = 1.0137e-04
Loss = 5.6573e-04, PNorm = 155.3197, GNorm = 0.0348, lr_0 = 1.0130e-04
Loss = 1.9468e-03, PNorm = 155.3207, GNorm = 0.0502, lr_0 = 1.0123e-04
Loss = 6.7524e-04, PNorm = 155.3216, GNorm = 0.1692, lr_0 = 1.0116e-04
Loss = 4.2503e-04, PNorm = 155.3222, GNorm = 0.0202, lr_0 = 1.0110e-04
Loss = 1.6962e-03, PNorm = 155.3236, GNorm = 0.0816, lr_0 = 1.0103e-04
Loss = 7.7803e-04, PNorm = 155.3241, GNorm = 0.0739, lr_0 = 1.0096e-04
Loss = 5.8000e-04, PNorm = 155.3245, GNorm = 0.0670, lr_0 = 1.0089e-04
Loss = 2.3669e-03, PNorm = 155.3239, GNorm = 0.2880, lr_0 = 1.0082e-04
Loss = 7.0718e-04, PNorm = 155.3273, GNorm = 0.1054, lr_0 = 1.0075e-04
Loss = 1.4376e-03, PNorm = 155.3291, GNorm = 0.2596, lr_0 = 1.0068e-04
Loss = 1.3531e-03, PNorm = 155.3324, GNorm = 0.1656, lr_0 = 1.0061e-04
Loss = 1.4493e-03, PNorm = 155.3338, GNorm = 0.1155, lr_0 = 1.0054e-04
Loss = 1.3783e-03, PNorm = 155.3339, GNorm = 0.0254, lr_0 = 1.0047e-04
Loss = 3.2410e-03, PNorm = 155.3330, GNorm = 0.0680, lr_0 = 1.0041e-04
Loss = 2.2106e-03, PNorm = 155.3327, GNorm = 0.0565, lr_0 = 1.0034e-04
Loss = 1.3472e-03, PNorm = 155.3356, GNorm = 0.0644, lr_0 = 1.0027e-04
Loss = 7.6684e-04, PNorm = 155.3379, GNorm = 0.0263, lr_0 = 1.0020e-04
Loss = 2.6701e-03, PNorm = 155.3398, GNorm = 0.1041, lr_0 = 1.0013e-04
Loss = 1.0989e-03, PNorm = 155.3407, GNorm = 0.2336, lr_0 = 1.0006e-04
Loss = 1.7229e-03, PNorm = 155.3421, GNorm = 0.1640, lr_0 = 1.0000e-04
Validation mae = 0.475471
Model 0 best validation mae = 0.474694 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.453829
Ensemble test mae = 0.453829
Fold 5
Splitting data with seed 5
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 8.9051e-01, PNorm = 62.2448, GNorm = 2.9050, lr_0 = 1.0413e-04
Loss = 7.3033e-01, PNorm = 62.2572, GNorm = 1.9691, lr_0 = 1.0788e-04
Loss = 5.8295e-01, PNorm = 62.2703, GNorm = 2.5298, lr_0 = 1.1163e-04
Loss = 5.2917e-01, PNorm = 62.2814, GNorm = 2.1797, lr_0 = 1.1537e-04
Loss = 4.0019e-01, PNorm = 62.2896, GNorm = 2.9637, lr_0 = 1.1913e-04
Loss = 4.7284e-01, PNorm = 62.2984, GNorm = 3.7035, lr_0 = 1.2287e-04
Loss = 4.4530e-01, PNorm = 62.3075, GNorm = 1.9330, lr_0 = 1.2663e-04
Loss = 3.4393e-01, PNorm = 62.3167, GNorm = 2.0706, lr_0 = 1.3038e-04
Loss = 3.8895e-01, PNorm = 62.3256, GNorm = 1.9450, lr_0 = 1.3413e-04
Loss = 3.6596e-01, PNorm = 62.3373, GNorm = 2.3365, lr_0 = 1.3788e-04
Loss = 3.5356e-01, PNorm = 62.3479, GNorm = 2.3542, lr_0 = 1.4163e-04
Loss = 3.4364e-01, PNorm = 62.3573, GNorm = 2.3655, lr_0 = 1.4537e-04
Loss = 3.4904e-01, PNorm = 62.3655, GNorm = 1.4542, lr_0 = 1.4913e-04
Loss = 3.9303e-01, PNorm = 62.3762, GNorm = 1.6095, lr_0 = 1.5288e-04
Loss = 3.1896e-01, PNorm = 62.3886, GNorm = 3.3304, lr_0 = 1.5662e-04
Loss = 2.8271e-01, PNorm = 62.3988, GNorm = 2.2534, lr_0 = 1.6038e-04
Loss = 3.5705e-01, PNorm = 62.4066, GNorm = 1.7123, lr_0 = 1.6412e-04
Loss = 2.8341e-01, PNorm = 62.4170, GNorm = 2.1608, lr_0 = 1.6788e-04
Loss = 2.7660e-01, PNorm = 62.4287, GNorm = 1.8188, lr_0 = 1.7163e-04
Loss = 3.1276e-01, PNorm = 62.4381, GNorm = 2.9304, lr_0 = 1.7538e-04
Loss = 2.8683e-01, PNorm = 62.4484, GNorm = 1.6715, lr_0 = 1.7913e-04
Loss = 3.0808e-01, PNorm = 62.4608, GNorm = 2.0325, lr_0 = 1.8288e-04
Loss = 3.0969e-01, PNorm = 62.4717, GNorm = 1.6914, lr_0 = 1.8662e-04
Loss = 3.2000e-01, PNorm = 62.4831, GNorm = 2.2623, lr_0 = 1.9038e-04
Loss = 3.6944e-01, PNorm = 62.4949, GNorm = 3.4023, lr_0 = 1.9413e-04
Loss = 3.0200e-01, PNorm = 62.5086, GNorm = 1.3252, lr_0 = 1.9788e-04
Loss = 3.6640e-01, PNorm = 62.5246, GNorm = 2.5784, lr_0 = 2.0163e-04
Loss = 3.2029e-01, PNorm = 62.5439, GNorm = 2.8228, lr_0 = 2.0537e-04
Loss = 2.8594e-01, PNorm = 62.5577, GNorm = 2.0864, lr_0 = 2.0913e-04
Loss = 2.5723e-01, PNorm = 62.5693, GNorm = 1.8308, lr_0 = 2.1288e-04
Loss = 2.6632e-01, PNorm = 62.5864, GNorm = 1.2952, lr_0 = 2.1663e-04
Loss = 2.5896e-01, PNorm = 62.6033, GNorm = 1.7286, lr_0 = 2.2038e-04
Loss = 2.6982e-01, PNorm = 62.6156, GNorm = 1.4627, lr_0 = 2.2412e-04
Loss = 2.6301e-01, PNorm = 62.6317, GNorm = 1.0329, lr_0 = 2.2787e-04
Loss = 3.0276e-01, PNorm = 62.6499, GNorm = 2.3544, lr_0 = 2.3163e-04
Loss = 2.5137e-01, PNorm = 62.6680, GNorm = 1.9373, lr_0 = 2.3538e-04
Loss = 2.8626e-01, PNorm = 62.6836, GNorm = 1.8675, lr_0 = 2.3913e-04
Loss = 2.3433e-01, PNorm = 62.7003, GNorm = 1.4223, lr_0 = 2.4288e-04
Loss = 2.5923e-01, PNorm = 62.7184, GNorm = 1.2587, lr_0 = 2.4662e-04
Loss = 2.8196e-01, PNorm = 62.7361, GNorm = 2.2036, lr_0 = 2.5038e-04
Loss = 2.3814e-01, PNorm = 62.7551, GNorm = 1.4452, lr_0 = 2.5413e-04
Loss = 2.8716e-01, PNorm = 62.7754, GNorm = 1.3082, lr_0 = 2.5788e-04
Loss = 2.9685e-01, PNorm = 62.7922, GNorm = 1.6323, lr_0 = 2.6163e-04
Loss = 2.6073e-01, PNorm = 62.8146, GNorm = 1.3191, lr_0 = 2.6537e-04
Loss = 2.8192e-01, PNorm = 62.8335, GNorm = 1.4221, lr_0 = 2.6912e-04
Loss = 2.8550e-01, PNorm = 62.8557, GNorm = 1.0690, lr_0 = 2.7288e-04
Loss = 2.9921e-01, PNorm = 62.8780, GNorm = 1.4502, lr_0 = 2.7663e-04
Loss = 2.5757e-01, PNorm = 62.9028, GNorm = 1.5574, lr_0 = 2.8038e-04
Loss = 3.0243e-01, PNorm = 62.9251, GNorm = 1.5506, lr_0 = 2.8413e-04
Loss = 2.3443e-01, PNorm = 62.9522, GNorm = 1.6144, lr_0 = 2.8787e-04
Loss = 2.3045e-01, PNorm = 62.9714, GNorm = 1.4139, lr_0 = 2.9163e-04
Loss = 2.3403e-01, PNorm = 62.9940, GNorm = 1.4361, lr_0 = 2.9538e-04
Loss = 2.9671e-01, PNorm = 63.0180, GNorm = 1.2877, lr_0 = 2.9913e-04
Loss = 2.8134e-01, PNorm = 63.0442, GNorm = 1.2696, lr_0 = 3.0288e-04
Loss = 2.4929e-01, PNorm = 63.0709, GNorm = 1.9613, lr_0 = 3.0662e-04
Loss = 2.4252e-01, PNorm = 63.0932, GNorm = 1.1121, lr_0 = 3.1037e-04
Loss = 2.4881e-01, PNorm = 63.1181, GNorm = 0.9311, lr_0 = 3.1413e-04
Loss = 2.8309e-01, PNorm = 63.1457, GNorm = 1.2963, lr_0 = 3.1788e-04
Loss = 2.4457e-01, PNorm = 63.1703, GNorm = 1.1777, lr_0 = 3.2163e-04
Loss = 2.8849e-01, PNorm = 63.2001, GNorm = 1.3497, lr_0 = 3.2538e-04
Loss = 2.1386e-01, PNorm = 63.2317, GNorm = 1.4420, lr_0 = 3.2912e-04
Loss = 2.7035e-01, PNorm = 63.2520, GNorm = 1.3802, lr_0 = 3.3288e-04
Loss = 2.6557e-01, PNorm = 63.2831, GNorm = 1.3007, lr_0 = 3.3663e-04
Loss = 2.7698e-01, PNorm = 63.3110, GNorm = 1.2883, lr_0 = 3.4038e-04
Loss = 2.5340e-01, PNorm = 63.3428, GNorm = 1.0456, lr_0 = 3.4413e-04
Loss = 2.3435e-01, PNorm = 63.3726, GNorm = 1.2400, lr_0 = 3.4787e-04
Loss = 2.5112e-01, PNorm = 63.3969, GNorm = 1.1038, lr_0 = 3.5162e-04
Loss = 2.4977e-01, PNorm = 63.4271, GNorm = 0.8924, lr_0 = 3.5538e-04
Loss = 2.5922e-01, PNorm = 63.4556, GNorm = 1.3241, lr_0 = 3.5913e-04
Loss = 2.5973e-01, PNorm = 63.4894, GNorm = 1.3070, lr_0 = 3.6288e-04
Loss = 2.5114e-01, PNorm = 63.5225, GNorm = 1.0461, lr_0 = 3.6662e-04
Loss = 2.6113e-01, PNorm = 63.5496, GNorm = 1.1228, lr_0 = 3.7037e-04
Loss = 2.3171e-01, PNorm = 63.5885, GNorm = 1.1748, lr_0 = 3.7413e-04
Loss = 2.2916e-01, PNorm = 63.6175, GNorm = 1.0980, lr_0 = 3.7788e-04
Loss = 2.1214e-01, PNorm = 63.6529, GNorm = 1.2494, lr_0 = 3.8163e-04
Loss = 2.3488e-01, PNorm = 63.6839, GNorm = 0.9713, lr_0 = 3.8537e-04
Loss = 2.2374e-01, PNorm = 63.7179, GNorm = 0.7869, lr_0 = 3.8912e-04
Loss = 2.0399e-01, PNorm = 63.7507, GNorm = 1.0037, lr_0 = 3.9287e-04
Loss = 2.3597e-01, PNorm = 63.7823, GNorm = 1.0576, lr_0 = 3.9663e-04
Loss = 2.4316e-01, PNorm = 63.8178, GNorm = 1.4590, lr_0 = 4.0038e-04
Loss = 2.4884e-01, PNorm = 63.8563, GNorm = 1.8346, lr_0 = 4.0413e-04
Loss = 2.1916e-01, PNorm = 63.8991, GNorm = 1.2080, lr_0 = 4.0787e-04
Loss = 2.6302e-01, PNorm = 63.9318, GNorm = 1.7752, lr_0 = 4.1162e-04
Loss = 1.9099e-01, PNorm = 63.9717, GNorm = 1.3009, lr_0 = 4.1537e-04
Loss = 2.2743e-01, PNorm = 64.0069, GNorm = 1.1172, lr_0 = 4.1913e-04
Loss = 2.3027e-01, PNorm = 64.0465, GNorm = 0.7631, lr_0 = 4.2288e-04
Loss = 2.5349e-01, PNorm = 64.0895, GNorm = 1.0838, lr_0 = 4.2662e-04
Loss = 2.2971e-01, PNorm = 64.1245, GNorm = 0.9075, lr_0 = 4.3037e-04
Loss = 2.1333e-01, PNorm = 64.1638, GNorm = 1.0693, lr_0 = 4.3412e-04
Loss = 2.5938e-01, PNorm = 64.2032, GNorm = 1.2284, lr_0 = 4.3788e-04
Loss = 2.3160e-01, PNorm = 64.2460, GNorm = 1.5028, lr_0 = 4.4163e-04
Loss = 2.4427e-01, PNorm = 64.2852, GNorm = 1.1749, lr_0 = 4.4538e-04
Loss = 2.0987e-01, PNorm = 64.3246, GNorm = 1.0252, lr_0 = 4.4912e-04
Loss = 2.4881e-01, PNorm = 64.3625, GNorm = 1.0817, lr_0 = 4.5287e-04
Loss = 2.4041e-01, PNorm = 64.4018, GNorm = 0.9865, lr_0 = 4.5662e-04
Loss = 2.2905e-01, PNorm = 64.4485, GNorm = 0.8975, lr_0 = 4.6038e-04
Loss = 2.3139e-01, PNorm = 64.4917, GNorm = 0.9773, lr_0 = 4.6413e-04
Loss = 2.3317e-01, PNorm = 64.5397, GNorm = 0.7959, lr_0 = 4.6787e-04
Loss = 2.2204e-01, PNorm = 64.5841, GNorm = 1.7259, lr_0 = 4.7162e-04
Loss = 2.1040e-01, PNorm = 64.6285, GNorm = 1.0959, lr_0 = 4.7537e-04
Loss = 2.3906e-01, PNorm = 64.6752, GNorm = 1.0397, lr_0 = 4.7913e-04
Loss = 2.2825e-01, PNorm = 64.7211, GNorm = 1.1042, lr_0 = 4.8288e-04
Loss = 2.1241e-01, PNorm = 64.7730, GNorm = 1.0115, lr_0 = 4.8663e-04
Loss = 2.1843e-01, PNorm = 64.8196, GNorm = 0.7385, lr_0 = 4.9038e-04
Loss = 1.9048e-01, PNorm = 64.8713, GNorm = 1.2684, lr_0 = 4.9412e-04
Loss = 2.5506e-01, PNorm = 64.9196, GNorm = 1.9615, lr_0 = 4.9788e-04
Loss = 2.4699e-01, PNorm = 64.9716, GNorm = 1.2457, lr_0 = 5.0163e-04
Loss = 2.1096e-01, PNorm = 65.0294, GNorm = 1.0655, lr_0 = 5.0538e-04
Loss = 2.0998e-01, PNorm = 65.0838, GNorm = 1.0936, lr_0 = 5.0913e-04
Loss = 2.3230e-01, PNorm = 65.1309, GNorm = 1.4570, lr_0 = 5.1287e-04
Loss = 2.4422e-01, PNorm = 65.1822, GNorm = 1.0684, lr_0 = 5.1663e-04
Loss = 2.2699e-01, PNorm = 65.2387, GNorm = 0.8443, lr_0 = 5.2038e-04
Loss = 2.1870e-01, PNorm = 65.2971, GNorm = 1.0947, lr_0 = 5.2413e-04
Loss = 2.5369e-01, PNorm = 65.3476, GNorm = 0.9057, lr_0 = 5.2788e-04
Loss = 2.3491e-01, PNorm = 65.4105, GNorm = 0.8603, lr_0 = 5.3162e-04
Loss = 2.0959e-01, PNorm = 65.4708, GNorm = 1.2163, lr_0 = 5.3538e-04
Loss = 2.2014e-01, PNorm = 65.5279, GNorm = 0.9040, lr_0 = 5.3912e-04
Loss = 1.7340e-01, PNorm = 65.5873, GNorm = 0.8742, lr_0 = 5.4288e-04
Loss = 2.3480e-01, PNorm = 65.6393, GNorm = 0.9573, lr_0 = 5.4663e-04
Loss = 2.5033e-01, PNorm = 65.6892, GNorm = 1.3970, lr_0 = 5.5038e-04
Validation mae = 0.555247
Epoch 1
Loss = 1.3549e-01, PNorm = 65.7472, GNorm = 0.8837, lr_0 = 5.5413e-04
Loss = 1.5451e-01, PNorm = 65.8040, GNorm = 0.9306, lr_0 = 5.5787e-04
Loss = 1.5508e-01, PNorm = 65.8503, GNorm = 0.5738, lr_0 = 5.6163e-04
Loss = 1.5334e-01, PNorm = 65.8985, GNorm = 0.9636, lr_0 = 5.6538e-04
Loss = 1.4007e-01, PNorm = 65.9415, GNorm = 0.7110, lr_0 = 5.6913e-04
Loss = 1.4991e-01, PNorm = 65.9939, GNorm = 0.9542, lr_0 = 5.7288e-04
Loss = 1.5654e-01, PNorm = 66.0493, GNorm = 1.0195, lr_0 = 5.7662e-04
Loss = 1.5770e-01, PNorm = 66.1039, GNorm = 0.7389, lr_0 = 5.8038e-04
Loss = 1.5836e-01, PNorm = 66.1652, GNorm = 0.8341, lr_0 = 5.8413e-04
Loss = 1.3910e-01, PNorm = 66.2181, GNorm = 1.0193, lr_0 = 5.8788e-04
Loss = 1.3700e-01, PNorm = 66.2799, GNorm = 0.7213, lr_0 = 5.9163e-04
Loss = 1.6007e-01, PNorm = 66.3361, GNorm = 0.8421, lr_0 = 5.9538e-04
Loss = 1.3669e-01, PNorm = 66.4002, GNorm = 0.8987, lr_0 = 5.9913e-04
Loss = 1.2815e-01, PNorm = 66.4571, GNorm = 0.8114, lr_0 = 6.0288e-04
Loss = 1.4847e-01, PNorm = 66.5193, GNorm = 1.1231, lr_0 = 6.0663e-04
Loss = 1.3986e-01, PNorm = 66.5798, GNorm = 0.8231, lr_0 = 6.1038e-04
Loss = 1.5906e-01, PNorm = 66.6444, GNorm = 0.7899, lr_0 = 6.1413e-04
Loss = 1.6287e-01, PNorm = 66.7083, GNorm = 1.2150, lr_0 = 6.1788e-04
Loss = 1.4188e-01, PNorm = 66.7692, GNorm = 0.8156, lr_0 = 6.2163e-04
Loss = 1.4188e-01, PNorm = 66.8357, GNorm = 0.9936, lr_0 = 6.2538e-04
Loss = 1.5395e-01, PNorm = 66.8997, GNorm = 0.7768, lr_0 = 6.2913e-04
Loss = 1.5429e-01, PNorm = 66.9692, GNorm = 0.7077, lr_0 = 6.3288e-04
Loss = 1.7606e-01, PNorm = 67.0396, GNorm = 0.9588, lr_0 = 6.3663e-04
Loss = 1.4180e-01, PNorm = 67.1158, GNorm = 0.7306, lr_0 = 6.4038e-04
Loss = 1.4306e-01, PNorm = 67.1845, GNorm = 1.0418, lr_0 = 6.4413e-04
Loss = 1.6791e-01, PNorm = 67.2567, GNorm = 0.8233, lr_0 = 6.4788e-04
Loss = 1.4149e-01, PNorm = 67.3346, GNorm = 0.8259, lr_0 = 6.5163e-04
Loss = 1.4830e-01, PNorm = 67.4077, GNorm = 0.7878, lr_0 = 6.5538e-04
Loss = 1.7219e-01, PNorm = 67.4854, GNorm = 0.7319, lr_0 = 6.5913e-04
Loss = 1.6365e-01, PNorm = 67.5679, GNorm = 0.7695, lr_0 = 6.6288e-04
Loss = 1.7077e-01, PNorm = 67.6492, GNorm = 1.0783, lr_0 = 6.6663e-04
Loss = 1.6094e-01, PNorm = 67.7273, GNorm = 0.7119, lr_0 = 6.7038e-04
Loss = 1.9321e-01, PNorm = 67.8206, GNorm = 0.9932, lr_0 = 6.7413e-04
Loss = 1.8114e-01, PNorm = 67.9027, GNorm = 0.8416, lr_0 = 6.7788e-04
Loss = 1.4760e-01, PNorm = 67.9870, GNorm = 0.9693, lr_0 = 6.8163e-04
Loss = 1.5947e-01, PNorm = 68.0616, GNorm = 0.9639, lr_0 = 6.8538e-04
Loss = 1.6442e-01, PNorm = 68.1429, GNorm = 0.7808, lr_0 = 6.8913e-04
Loss = 1.4983e-01, PNorm = 68.2291, GNorm = 0.9525, lr_0 = 6.9288e-04
Loss = 1.5974e-01, PNorm = 68.3155, GNorm = 0.9000, lr_0 = 6.9663e-04
Loss = 1.7238e-01, PNorm = 68.4111, GNorm = 1.3806, lr_0 = 7.0038e-04
Loss = 1.6034e-01, PNorm = 68.5068, GNorm = 0.8190, lr_0 = 7.0413e-04
Loss = 1.5157e-01, PNorm = 68.5973, GNorm = 0.7889, lr_0 = 7.0788e-04
Loss = 1.7577e-01, PNorm = 68.6919, GNorm = 0.7484, lr_0 = 7.1163e-04
Loss = 1.6854e-01, PNorm = 68.7976, GNorm = 0.7662, lr_0 = 7.1538e-04
Loss = 1.5616e-01, PNorm = 68.8933, GNorm = 0.7649, lr_0 = 7.1913e-04
Loss = 1.9447e-01, PNorm = 68.9925, GNorm = 0.9934, lr_0 = 7.2288e-04
Loss = 1.6578e-01, PNorm = 69.0902, GNorm = 0.8409, lr_0 = 7.2663e-04
Loss = 1.7804e-01, PNorm = 69.1970, GNorm = 1.4722, lr_0 = 7.3038e-04
Loss = 1.6271e-01, PNorm = 69.3014, GNorm = 0.6903, lr_0 = 7.3413e-04
Loss = 1.5478e-01, PNorm = 69.3936, GNorm = 0.7497, lr_0 = 7.3788e-04
Loss = 1.8284e-01, PNorm = 69.4900, GNorm = 0.8952, lr_0 = 7.4163e-04
Loss = 1.3996e-01, PNorm = 69.5869, GNorm = 0.7489, lr_0 = 7.4538e-04
Loss = 1.7906e-01, PNorm = 69.6808, GNorm = 0.9277, lr_0 = 7.4913e-04
Loss = 1.6038e-01, PNorm = 69.7792, GNorm = 0.7190, lr_0 = 7.5288e-04
Loss = 1.6035e-01, PNorm = 69.8756, GNorm = 0.6955, lr_0 = 7.5663e-04
Loss = 1.6689e-01, PNorm = 69.9762, GNorm = 0.9019, lr_0 = 7.6038e-04
Loss = 1.8904e-01, PNorm = 70.0867, GNorm = 0.8564, lr_0 = 7.6413e-04
Loss = 1.7956e-01, PNorm = 70.1987, GNorm = 1.1880, lr_0 = 7.6788e-04
Loss = 1.8896e-01, PNorm = 70.3075, GNorm = 0.7131, lr_0 = 7.7163e-04
Loss = 1.6291e-01, PNorm = 70.4145, GNorm = 0.9525, lr_0 = 7.7538e-04
Loss = 1.6393e-01, PNorm = 70.5271, GNorm = 0.8301, lr_0 = 7.7913e-04
Loss = 1.4578e-01, PNorm = 70.6196, GNorm = 0.6127, lr_0 = 7.8288e-04
Loss = 1.8025e-01, PNorm = 70.7105, GNorm = 0.7249, lr_0 = 7.8663e-04
Loss = 1.6118e-01, PNorm = 70.8115, GNorm = 0.8631, lr_0 = 7.9038e-04
Loss = 1.6017e-01, PNorm = 70.9111, GNorm = 0.6687, lr_0 = 7.9413e-04
Loss = 1.7362e-01, PNorm = 71.0137, GNorm = 0.8670, lr_0 = 7.9788e-04
Loss = 1.6180e-01, PNorm = 71.1277, GNorm = 0.6433, lr_0 = 8.0163e-04
Loss = 1.8066e-01, PNorm = 71.2346, GNorm = 0.8444, lr_0 = 8.0538e-04
Loss = 1.5260e-01, PNorm = 71.3504, GNorm = 0.8076, lr_0 = 8.0913e-04
Loss = 1.8352e-01, PNorm = 71.4564, GNorm = 0.8275, lr_0 = 8.1288e-04
Loss = 1.5082e-01, PNorm = 71.5760, GNorm = 0.9761, lr_0 = 8.1663e-04
Loss = 1.7901e-01, PNorm = 71.6840, GNorm = 0.9367, lr_0 = 8.2038e-04
Loss = 1.4772e-01, PNorm = 71.7921, GNorm = 0.6597, lr_0 = 8.2413e-04
Loss = 1.8659e-01, PNorm = 71.9139, GNorm = 1.2217, lr_0 = 8.2788e-04
Loss = 1.9779e-01, PNorm = 72.0346, GNorm = 0.8389, lr_0 = 8.3163e-04
Loss = 1.6976e-01, PNorm = 72.1565, GNorm = 0.7020, lr_0 = 8.3538e-04
Loss = 1.5402e-01, PNorm = 72.2659, GNorm = 0.6745, lr_0 = 8.3913e-04
Loss = 1.6898e-01, PNorm = 72.3780, GNorm = 0.6698, lr_0 = 8.4288e-04
Loss = 1.6737e-01, PNorm = 72.4906, GNorm = 0.9179, lr_0 = 8.4663e-04
Loss = 1.7255e-01, PNorm = 72.6049, GNorm = 0.8807, lr_0 = 8.5038e-04
Loss = 1.5947e-01, PNorm = 72.7178, GNorm = 0.9472, lr_0 = 8.5413e-04
Loss = 1.6284e-01, PNorm = 72.8218, GNorm = 0.6068, lr_0 = 8.5788e-04
Loss = 2.0197e-01, PNorm = 72.9343, GNorm = 1.3628, lr_0 = 8.6163e-04
Loss = 1.8520e-01, PNorm = 73.0432, GNorm = 1.2672, lr_0 = 8.6538e-04
Loss = 1.8616e-01, PNorm = 73.1723, GNorm = 0.5874, lr_0 = 8.6913e-04
Loss = 1.9159e-01, PNorm = 73.2802, GNorm = 1.4748, lr_0 = 8.7288e-04
Loss = 1.7000e-01, PNorm = 73.4119, GNorm = 0.7165, lr_0 = 8.7663e-04
Loss = 1.8733e-01, PNorm = 73.5289, GNorm = 0.4620, lr_0 = 8.8038e-04
Loss = 1.9791e-01, PNorm = 73.6563, GNorm = 0.9450, lr_0 = 8.8413e-04
Loss = 1.5260e-01, PNorm = 73.7733, GNorm = 0.7596, lr_0 = 8.8788e-04
Loss = 1.9731e-01, PNorm = 73.8904, GNorm = 0.7974, lr_0 = 8.9163e-04
Loss = 1.9971e-01, PNorm = 74.0098, GNorm = 1.0761, lr_0 = 8.9538e-04
Loss = 1.9402e-01, PNorm = 74.1336, GNorm = 0.7907, lr_0 = 8.9913e-04
Loss = 1.6128e-01, PNorm = 74.2550, GNorm = 0.9088, lr_0 = 9.0288e-04
Loss = 1.4993e-01, PNorm = 74.3637, GNorm = 0.6878, lr_0 = 9.0663e-04
Loss = 1.6397e-01, PNorm = 74.4722, GNorm = 0.6617, lr_0 = 9.1038e-04
Loss = 1.8948e-01, PNorm = 74.5777, GNorm = 0.8046, lr_0 = 9.1413e-04
Loss = 1.6338e-01, PNorm = 74.6972, GNorm = 0.6480, lr_0 = 9.1788e-04
Loss = 1.7739e-01, PNorm = 74.8143, GNorm = 0.8433, lr_0 = 9.2163e-04
Loss = 1.5704e-01, PNorm = 74.9358, GNorm = 0.7254, lr_0 = 9.2538e-04
Loss = 1.7834e-01, PNorm = 75.0487, GNorm = 0.8348, lr_0 = 9.2913e-04
Loss = 1.6471e-01, PNorm = 75.1825, GNorm = 1.2631, lr_0 = 9.3288e-04
Loss = 2.1915e-01, PNorm = 75.3021, GNorm = 1.0332, lr_0 = 9.3663e-04
Loss = 1.8921e-01, PNorm = 75.4332, GNorm = 0.6932, lr_0 = 9.4038e-04
Loss = 1.8864e-01, PNorm = 75.5635, GNorm = 0.9333, lr_0 = 9.4413e-04
Loss = 1.7287e-01, PNorm = 75.6903, GNorm = 0.6667, lr_0 = 9.4788e-04
Loss = 1.6445e-01, PNorm = 75.8174, GNorm = 0.6275, lr_0 = 9.5163e-04
Loss = 1.6485e-01, PNorm = 75.9458, GNorm = 0.9019, lr_0 = 9.5538e-04
Loss = 1.8192e-01, PNorm = 76.0900, GNorm = 0.7567, lr_0 = 9.5913e-04
Loss = 1.6179e-01, PNorm = 76.2302, GNorm = 1.0443, lr_0 = 9.6288e-04
Loss = 1.8172e-01, PNorm = 76.3747, GNorm = 1.2995, lr_0 = 9.6663e-04
Loss = 1.8447e-01, PNorm = 76.5006, GNorm = 1.0622, lr_0 = 9.7038e-04
Loss = 1.7009e-01, PNorm = 76.6503, GNorm = 1.0592, lr_0 = 9.7413e-04
Loss = 1.5562e-01, PNorm = 76.7829, GNorm = 0.6739, lr_0 = 9.7788e-04
Loss = 1.7173e-01, PNorm = 76.9211, GNorm = 0.4151, lr_0 = 9.8163e-04
Loss = 1.7543e-01, PNorm = 77.0560, GNorm = 0.7165, lr_0 = 9.8537e-04
Loss = 1.9019e-01, PNorm = 77.1921, GNorm = 1.1115, lr_0 = 9.8912e-04
Loss = 1.4843e-01, PNorm = 77.3392, GNorm = 0.5272, lr_0 = 9.9288e-04
Loss = 1.6955e-01, PNorm = 77.4611, GNorm = 1.1792, lr_0 = 9.9663e-04
Loss = 1.6279e-01, PNorm = 77.5914, GNorm = 0.7155, lr_0 = 9.9993e-04
Validation mae = 0.536592
Epoch 2
Loss = 1.3480e-01, PNorm = 77.7130, GNorm = 1.2343, lr_0 = 9.9925e-04
Loss = 1.1329e-01, PNorm = 77.8421, GNorm = 0.6221, lr_0 = 9.9856e-04
Loss = 1.0263e-01, PNorm = 77.9445, GNorm = 0.5315, lr_0 = 9.9788e-04
Loss = 1.1040e-01, PNorm = 78.0358, GNorm = 0.6617, lr_0 = 9.9719e-04
Loss = 1.1092e-01, PNorm = 78.1249, GNorm = 0.7450, lr_0 = 9.9651e-04
Loss = 1.2022e-01, PNorm = 78.2044, GNorm = 0.8611, lr_0 = 9.9583e-04
Loss = 1.1633e-01, PNorm = 78.2954, GNorm = 0.5864, lr_0 = 9.9515e-04
Loss = 1.0065e-01, PNorm = 78.3811, GNorm = 0.4553, lr_0 = 9.9446e-04
Loss = 1.0668e-01, PNorm = 78.4710, GNorm = 0.8328, lr_0 = 9.9378e-04
Loss = 9.6010e-02, PNorm = 78.5499, GNorm = 0.4626, lr_0 = 9.9310e-04
Loss = 1.0480e-01, PNorm = 78.6393, GNorm = 0.7657, lr_0 = 9.9242e-04
Loss = 1.0871e-01, PNorm = 78.7270, GNorm = 0.6559, lr_0 = 9.9174e-04
Loss = 1.0579e-01, PNorm = 78.8206, GNorm = 0.4255, lr_0 = 9.9106e-04
Loss = 1.1195e-01, PNorm = 78.9087, GNorm = 0.5089, lr_0 = 9.9038e-04
Loss = 1.0409e-01, PNorm = 79.0069, GNorm = 0.5425, lr_0 = 9.8971e-04
Loss = 1.0547e-01, PNorm = 79.1042, GNorm = 0.5964, lr_0 = 9.8903e-04
Loss = 1.0891e-01, PNorm = 79.2142, GNorm = 0.8300, lr_0 = 9.8835e-04
Loss = 1.0650e-01, PNorm = 79.3129, GNorm = 0.4682, lr_0 = 9.8767e-04
Loss = 1.0649e-01, PNorm = 79.4112, GNorm = 0.5041, lr_0 = 9.8700e-04
Loss = 9.8005e-02, PNorm = 79.5096, GNorm = 0.4862, lr_0 = 9.8632e-04
Loss = 1.0325e-01, PNorm = 79.6006, GNorm = 0.6658, lr_0 = 9.8564e-04
Loss = 1.0119e-01, PNorm = 79.7002, GNorm = 0.7909, lr_0 = 9.8497e-04
Loss = 9.6888e-02, PNorm = 79.7940, GNorm = 0.6251, lr_0 = 9.8429e-04
Loss = 1.1244e-01, PNorm = 79.8881, GNorm = 0.5255, lr_0 = 9.8362e-04
Loss = 9.8953e-02, PNorm = 79.9785, GNorm = 0.8242, lr_0 = 9.8295e-04
Loss = 9.7940e-02, PNorm = 80.0696, GNorm = 0.7236, lr_0 = 9.8227e-04
Loss = 1.1615e-01, PNorm = 80.1671, GNorm = 0.6568, lr_0 = 9.8160e-04
Loss = 9.5480e-02, PNorm = 80.2776, GNorm = 0.5185, lr_0 = 9.8093e-04
Loss = 1.1097e-01, PNorm = 80.3729, GNorm = 0.5607, lr_0 = 9.8026e-04
Loss = 1.1499e-01, PNorm = 80.4727, GNorm = 0.6225, lr_0 = 9.7958e-04
Loss = 1.2167e-01, PNorm = 80.5909, GNorm = 0.5906, lr_0 = 9.7891e-04
Loss = 1.1353e-01, PNorm = 80.7005, GNorm = 0.5551, lr_0 = 9.7824e-04
Loss = 1.0038e-01, PNorm = 80.8117, GNorm = 0.5503, lr_0 = 9.7757e-04
Loss = 1.1220e-01, PNorm = 80.9203, GNorm = 0.9070, lr_0 = 9.7690e-04
Loss = 1.1048e-01, PNorm = 81.0266, GNorm = 0.5748, lr_0 = 9.7623e-04
Loss = 1.1588e-01, PNorm = 81.1456, GNorm = 0.6389, lr_0 = 9.7556e-04
Loss = 1.0428e-01, PNorm = 81.2411, GNorm = 0.9013, lr_0 = 9.7490e-04
Loss = 1.1143e-01, PNorm = 81.3461, GNorm = 0.8245, lr_0 = 9.7423e-04
Loss = 1.1928e-01, PNorm = 81.4439, GNorm = 0.5948, lr_0 = 9.7356e-04
Loss = 1.2261e-01, PNorm = 81.5638, GNorm = 0.8423, lr_0 = 9.7289e-04
Loss = 1.0282e-01, PNorm = 81.6676, GNorm = 0.6412, lr_0 = 9.7223e-04
Loss = 1.1135e-01, PNorm = 81.7775, GNorm = 0.7560, lr_0 = 9.7156e-04
Loss = 1.0322e-01, PNorm = 81.8752, GNorm = 0.3664, lr_0 = 9.7090e-04
Loss = 1.1642e-01, PNorm = 81.9895, GNorm = 0.6445, lr_0 = 9.7023e-04
Loss = 1.1229e-01, PNorm = 82.0822, GNorm = 0.5014, lr_0 = 9.6957e-04
Loss = 1.0895e-01, PNorm = 82.1921, GNorm = 0.5622, lr_0 = 9.6890e-04
Loss = 1.0513e-01, PNorm = 82.2865, GNorm = 0.5700, lr_0 = 9.6824e-04
Loss = 1.0184e-01, PNorm = 82.3947, GNorm = 0.9685, lr_0 = 9.6757e-04
Loss = 9.9141e-02, PNorm = 82.4874, GNorm = 0.7121, lr_0 = 9.6691e-04
Loss = 1.1305e-01, PNorm = 82.5977, GNorm = 0.4745, lr_0 = 9.6625e-04
Loss = 1.0692e-01, PNorm = 82.7117, GNorm = 0.6291, lr_0 = 9.6559e-04
Loss = 1.1747e-01, PNorm = 82.8172, GNorm = 0.7664, lr_0 = 9.6493e-04
Loss = 1.1567e-01, PNorm = 82.9276, GNorm = 0.6517, lr_0 = 9.6427e-04
Loss = 1.1414e-01, PNorm = 83.0445, GNorm = 0.7965, lr_0 = 9.6360e-04
Loss = 1.2057e-01, PNorm = 83.1622, GNorm = 0.7544, lr_0 = 9.6294e-04
Loss = 9.9936e-02, PNorm = 83.2770, GNorm = 0.5775, lr_0 = 9.6228e-04
Loss = 1.0441e-01, PNorm = 83.3857, GNorm = 0.5104, lr_0 = 9.6163e-04
Loss = 1.0779e-01, PNorm = 83.4957, GNorm = 0.6503, lr_0 = 9.6097e-04
Loss = 1.0303e-01, PNorm = 83.6179, GNorm = 0.4698, lr_0 = 9.6031e-04
Loss = 1.0881e-01, PNorm = 83.7229, GNorm = 0.4640, lr_0 = 9.5965e-04
Loss = 1.0442e-01, PNorm = 83.8167, GNorm = 0.5502, lr_0 = 9.5899e-04
Loss = 1.1423e-01, PNorm = 83.9182, GNorm = 0.7518, lr_0 = 9.5834e-04
Loss = 1.0759e-01, PNorm = 84.0160, GNorm = 0.6786, lr_0 = 9.5768e-04
Loss = 1.1354e-01, PNorm = 84.1195, GNorm = 0.8053, lr_0 = 9.5702e-04
Loss = 1.0999e-01, PNorm = 84.2227, GNorm = 0.7445, lr_0 = 9.5637e-04
Loss = 1.1657e-01, PNorm = 84.3399, GNorm = 0.7604, lr_0 = 9.5571e-04
Loss = 1.1001e-01, PNorm = 84.4460, GNorm = 0.6520, lr_0 = 9.5506e-04
Loss = 1.1846e-01, PNorm = 84.5411, GNorm = 0.8169, lr_0 = 9.5440e-04
Loss = 1.0664e-01, PNorm = 84.6434, GNorm = 1.5144, lr_0 = 9.5375e-04
Loss = 1.1339e-01, PNorm = 84.7466, GNorm = 0.7432, lr_0 = 9.5310e-04
Loss = 1.0989e-01, PNorm = 84.8627, GNorm = 0.6301, lr_0 = 9.5244e-04
Loss = 1.2681e-01, PNorm = 84.9716, GNorm = 0.7475, lr_0 = 9.5179e-04
Loss = 1.2290e-01, PNorm = 85.0789, GNorm = 0.5512, lr_0 = 9.5114e-04
Loss = 1.1612e-01, PNorm = 85.1934, GNorm = 0.5409, lr_0 = 9.5049e-04
Loss = 1.1704e-01, PNorm = 85.3027, GNorm = 0.9595, lr_0 = 9.4984e-04
Loss = 1.1499e-01, PNorm = 85.4105, GNorm = 0.5534, lr_0 = 9.4919e-04
Loss = 1.2584e-01, PNorm = 85.5110, GNorm = 0.4002, lr_0 = 9.4854e-04
Loss = 1.1396e-01, PNorm = 85.6213, GNorm = 0.8636, lr_0 = 9.4789e-04
Loss = 1.1097e-01, PNorm = 85.7175, GNorm = 0.8973, lr_0 = 9.4724e-04
Loss = 1.0714e-01, PNorm = 85.8158, GNorm = 0.4873, lr_0 = 9.4659e-04
Loss = 1.1544e-01, PNorm = 85.9245, GNorm = 0.6851, lr_0 = 9.4594e-04
Loss = 1.1819e-01, PNorm = 86.0287, GNorm = 0.5529, lr_0 = 9.4529e-04
Loss = 1.2637e-01, PNorm = 86.1443, GNorm = 1.0732, lr_0 = 9.4464e-04
Loss = 1.2084e-01, PNorm = 86.2561, GNorm = 0.7410, lr_0 = 9.4400e-04
Loss = 1.0530e-01, PNorm = 86.3805, GNorm = 0.4944, lr_0 = 9.4335e-04
Loss = 1.2004e-01, PNorm = 86.4984, GNorm = 0.8922, lr_0 = 9.4270e-04
Loss = 1.1010e-01, PNorm = 86.6137, GNorm = 0.7806, lr_0 = 9.4206e-04
Loss = 1.0745e-01, PNorm = 86.7190, GNorm = 0.5802, lr_0 = 9.4141e-04
Loss = 1.0898e-01, PNorm = 86.8239, GNorm = 0.9377, lr_0 = 9.4077e-04
Loss = 1.2030e-01, PNorm = 86.9291, GNorm = 0.6751, lr_0 = 9.4012e-04
Loss = 1.2003e-01, PNorm = 87.0452, GNorm = 0.6647, lr_0 = 9.3948e-04
Loss = 1.1284e-01, PNorm = 87.1523, GNorm = 0.6879, lr_0 = 9.3884e-04
Loss = 1.2110e-01, PNorm = 87.2557, GNorm = 0.5358, lr_0 = 9.3819e-04
Loss = 1.2790e-01, PNorm = 87.3730, GNorm = 1.7504, lr_0 = 9.3755e-04
Loss = 1.1304e-01, PNorm = 87.4772, GNorm = 1.0909, lr_0 = 9.3691e-04
Loss = 1.3069e-01, PNorm = 87.5842, GNorm = 0.5077, lr_0 = 9.3627e-04
Loss = 1.0557e-01, PNorm = 87.6965, GNorm = 0.9537, lr_0 = 9.3562e-04
Loss = 1.1884e-01, PNorm = 87.8037, GNorm = 1.0194, lr_0 = 9.3498e-04
Loss = 1.1302e-01, PNorm = 87.9070, GNorm = 0.5243, lr_0 = 9.3434e-04
Loss = 1.1167e-01, PNorm = 88.0079, GNorm = 0.4539, lr_0 = 9.3370e-04
Loss = 1.1948e-01, PNorm = 88.1146, GNorm = 0.6355, lr_0 = 9.3306e-04
Loss = 1.1006e-01, PNorm = 88.2194, GNorm = 1.1942, lr_0 = 9.3242e-04
Loss = 1.1301e-01, PNorm = 88.3408, GNorm = 0.7852, lr_0 = 9.3178e-04
Loss = 1.2458e-01, PNorm = 88.4523, GNorm = 0.8997, lr_0 = 9.3115e-04
Loss = 1.1029e-01, PNorm = 88.5526, GNorm = 0.5394, lr_0 = 9.3051e-04
Loss = 1.1386e-01, PNorm = 88.6587, GNorm = 0.4521, lr_0 = 9.2987e-04
Loss = 1.3532e-01, PNorm = 88.7660, GNorm = 0.7664, lr_0 = 9.2923e-04
Loss = 1.0136e-01, PNorm = 88.8746, GNorm = 0.7599, lr_0 = 9.2860e-04
Loss = 1.1561e-01, PNorm = 88.9791, GNorm = 0.5194, lr_0 = 9.2796e-04
Loss = 1.1224e-01, PNorm = 89.0878, GNorm = 1.1763, lr_0 = 9.2733e-04
Loss = 1.2033e-01, PNorm = 89.1878, GNorm = 0.8874, lr_0 = 9.2669e-04
Loss = 1.3652e-01, PNorm = 89.2936, GNorm = 0.6633, lr_0 = 9.2606e-04
Loss = 1.1989e-01, PNorm = 89.4144, GNorm = 0.9987, lr_0 = 9.2542e-04
Loss = 1.2177e-01, PNorm = 89.5141, GNorm = 0.8455, lr_0 = 9.2479e-04
Loss = 1.2848e-01, PNorm = 89.6162, GNorm = 1.1281, lr_0 = 9.2415e-04
Loss = 1.2250e-01, PNorm = 89.7228, GNorm = 0.6046, lr_0 = 9.2352e-04
Loss = 1.0279e-01, PNorm = 89.8289, GNorm = 0.6525, lr_0 = 9.2289e-04
Loss = 1.2234e-01, PNorm = 89.9385, GNorm = 0.7578, lr_0 = 9.2226e-04
Loss = 1.1703e-01, PNorm = 90.0413, GNorm = 0.6257, lr_0 = 9.2162e-04
Loss = 1.1455e-01, PNorm = 90.1418, GNorm = 0.5068, lr_0 = 9.2099e-04
Validation mae = 0.514035
Epoch 3
Loss = 7.2764e-02, PNorm = 90.2440, GNorm = 0.6943, lr_0 = 9.2036e-04
Loss = 6.9357e-02, PNorm = 90.3162, GNorm = 0.3363, lr_0 = 9.1973e-04
Loss = 6.8650e-02, PNorm = 90.3797, GNorm = 0.3770, lr_0 = 9.1910e-04
Loss = 6.6264e-02, PNorm = 90.4505, GNorm = 0.7018, lr_0 = 9.1847e-04
Loss = 7.8848e-02, PNorm = 90.5144, GNorm = 0.4437, lr_0 = 9.1784e-04
Loss = 6.5028e-02, PNorm = 90.5804, GNorm = 0.3105, lr_0 = 9.1721e-04
Loss = 6.0229e-02, PNorm = 90.6425, GNorm = 0.9553, lr_0 = 9.1658e-04
Loss = 7.1280e-02, PNorm = 90.7030, GNorm = 0.6321, lr_0 = 9.1596e-04
Loss = 5.7041e-02, PNorm = 90.7708, GNorm = 0.3760, lr_0 = 9.1533e-04
Loss = 6.6271e-02, PNorm = 90.8290, GNorm = 0.5101, lr_0 = 9.1470e-04
Loss = 6.0444e-02, PNorm = 90.8840, GNorm = 0.4539, lr_0 = 9.1408e-04
Loss = 6.3018e-02, PNorm = 90.9432, GNorm = 0.8555, lr_0 = 9.1345e-04
Loss = 5.9767e-02, PNorm = 91.0032, GNorm = 0.5779, lr_0 = 9.1282e-04
Loss = 5.8188e-02, PNorm = 91.0649, GNorm = 0.2873, lr_0 = 9.1220e-04
Loss = 6.8189e-02, PNorm = 91.1201, GNorm = 0.4877, lr_0 = 9.1157e-04
Loss = 7.2923e-02, PNorm = 91.1966, GNorm = 0.3485, lr_0 = 9.1095e-04
Loss = 7.4086e-02, PNorm = 91.2647, GNorm = 0.7217, lr_0 = 9.1032e-04
Loss = 6.2348e-02, PNorm = 91.3430, GNorm = 0.9220, lr_0 = 9.0970e-04
Loss = 7.5425e-02, PNorm = 91.4049, GNorm = 0.4669, lr_0 = 9.0908e-04
Loss = 6.7264e-02, PNorm = 91.4732, GNorm = 0.3693, lr_0 = 9.0846e-04
Loss = 7.0510e-02, PNorm = 91.5498, GNorm = 0.8679, lr_0 = 9.0783e-04
Loss = 6.9926e-02, PNorm = 91.6293, GNorm = 0.6974, lr_0 = 9.0721e-04
Loss = 7.6614e-02, PNorm = 91.7009, GNorm = 0.3700, lr_0 = 9.0659e-04
Loss = 6.8156e-02, PNorm = 91.7844, GNorm = 0.6933, lr_0 = 9.0597e-04
Loss = 6.8627e-02, PNorm = 91.8547, GNorm = 0.8995, lr_0 = 9.0535e-04
Loss = 6.5177e-02, PNorm = 91.9307, GNorm = 0.6257, lr_0 = 9.0473e-04
Loss = 6.8371e-02, PNorm = 91.9999, GNorm = 0.7564, lr_0 = 9.0411e-04
Loss = 7.0325e-02, PNorm = 92.0631, GNorm = 0.3685, lr_0 = 9.0349e-04
Loss = 7.2400e-02, PNorm = 92.1274, GNorm = 0.4673, lr_0 = 9.0287e-04
Loss = 6.6118e-02, PNorm = 92.1960, GNorm = 1.0487, lr_0 = 9.0225e-04
Loss = 6.7332e-02, PNorm = 92.2634, GNorm = 0.6279, lr_0 = 9.0163e-04
Loss = 6.4018e-02, PNorm = 92.3349, GNorm = 0.3378, lr_0 = 9.0102e-04
Loss = 6.1684e-02, PNorm = 92.4023, GNorm = 0.5402, lr_0 = 9.0040e-04
Loss = 6.9074e-02, PNorm = 92.4795, GNorm = 0.7546, lr_0 = 8.9978e-04
Loss = 6.1956e-02, PNorm = 92.5580, GNorm = 0.3921, lr_0 = 8.9916e-04
Loss = 7.3077e-02, PNorm = 92.6311, GNorm = 0.5858, lr_0 = 8.9855e-04
Loss = 7.0459e-02, PNorm = 92.7067, GNorm = 0.7901, lr_0 = 8.9793e-04
Loss = 6.5168e-02, PNorm = 92.7854, GNorm = 0.3129, lr_0 = 8.9732e-04
Loss = 6.6466e-02, PNorm = 92.8669, GNorm = 0.3475, lr_0 = 8.9670e-04
Loss = 6.7683e-02, PNorm = 92.9364, GNorm = 0.5991, lr_0 = 8.9609e-04
Loss = 5.9620e-02, PNorm = 93.0043, GNorm = 0.4003, lr_0 = 8.9548e-04
Loss = 5.6375e-02, PNorm = 93.0792, GNorm = 0.7324, lr_0 = 8.9486e-04
Loss = 6.1633e-02, PNorm = 93.1479, GNorm = 0.7178, lr_0 = 8.9425e-04
Loss = 7.1136e-02, PNorm = 93.2209, GNorm = 0.4637, lr_0 = 8.9364e-04
Loss = 6.3103e-02, PNorm = 93.3025, GNorm = 0.4311, lr_0 = 8.9302e-04
Loss = 6.5565e-02, PNorm = 93.3712, GNorm = 0.3845, lr_0 = 8.9241e-04
Loss = 7.1358e-02, PNorm = 93.4520, GNorm = 0.5359, lr_0 = 8.9180e-04
Loss = 6.8130e-02, PNorm = 93.5180, GNorm = 0.4230, lr_0 = 8.9119e-04
Loss = 6.6204e-02, PNorm = 93.5942, GNorm = 0.8321, lr_0 = 8.9058e-04
Loss = 7.9616e-02, PNorm = 93.6695, GNorm = 0.4081, lr_0 = 8.8997e-04
Loss = 7.1163e-02, PNorm = 93.7548, GNorm = 0.6277, lr_0 = 8.8936e-04
Loss = 7.2205e-02, PNorm = 93.8308, GNorm = 0.5303, lr_0 = 8.8875e-04
Loss = 7.2847e-02, PNorm = 93.9128, GNorm = 0.6208, lr_0 = 8.8814e-04
Loss = 6.2114e-02, PNorm = 94.0004, GNorm = 0.4295, lr_0 = 8.8753e-04
Loss = 6.9549e-02, PNorm = 94.0809, GNorm = 0.5091, lr_0 = 8.8693e-04
Loss = 6.3557e-02, PNorm = 94.1576, GNorm = 0.4427, lr_0 = 8.8632e-04
Loss = 7.7471e-02, PNorm = 94.2448, GNorm = 0.4408, lr_0 = 8.8571e-04
Loss = 6.8576e-02, PNorm = 94.3262, GNorm = 0.9070, lr_0 = 8.8510e-04
Loss = 6.1712e-02, PNorm = 94.3987, GNorm = 0.3474, lr_0 = 8.8450e-04
Loss = 6.8029e-02, PNorm = 94.5004, GNorm = 0.4136, lr_0 = 8.8389e-04
Loss = 7.4914e-02, PNorm = 94.5824, GNorm = 0.7586, lr_0 = 8.8329e-04
Loss = 6.6122e-02, PNorm = 94.6646, GNorm = 0.2806, lr_0 = 8.8268e-04
Loss = 7.2012e-02, PNorm = 94.7577, GNorm = 0.5913, lr_0 = 8.8208e-04
Loss = 8.1334e-02, PNorm = 94.8411, GNorm = 0.9033, lr_0 = 8.8147e-04
Loss = 6.6740e-02, PNorm = 94.9292, GNorm = 0.3706, lr_0 = 8.8087e-04
Loss = 9.0467e-02, PNorm = 95.0234, GNorm = 0.4726, lr_0 = 8.8026e-04
Loss = 7.1018e-02, PNorm = 95.1135, GNorm = 0.4521, lr_0 = 8.7966e-04
Loss = 8.9536e-02, PNorm = 95.1989, GNorm = 1.5874, lr_0 = 8.7906e-04
Loss = 8.2348e-02, PNorm = 95.2967, GNorm = 0.6578, lr_0 = 8.7846e-04
Loss = 7.5534e-02, PNorm = 95.3880, GNorm = 0.4316, lr_0 = 8.7785e-04
Loss = 7.7973e-02, PNorm = 95.4878, GNorm = 0.5487, lr_0 = 8.7725e-04
Loss = 7.9294e-02, PNorm = 95.5748, GNorm = 0.4788, lr_0 = 8.7665e-04
Loss = 6.7662e-02, PNorm = 95.6532, GNorm = 0.5380, lr_0 = 8.7605e-04
Loss = 7.6154e-02, PNorm = 95.7294, GNorm = 0.5354, lr_0 = 8.7545e-04
Loss = 6.9872e-02, PNorm = 95.8148, GNorm = 0.5939, lr_0 = 8.7485e-04
Loss = 6.9594e-02, PNorm = 95.9036, GNorm = 0.3745, lr_0 = 8.7425e-04
Loss = 6.7105e-02, PNorm = 95.9822, GNorm = 0.4507, lr_0 = 8.7365e-04
Loss = 6.6510e-02, PNorm = 96.0582, GNorm = 0.4773, lr_0 = 8.7306e-04
Loss = 6.5400e-02, PNorm = 96.1347, GNorm = 0.6589, lr_0 = 8.7246e-04
Loss = 7.5693e-02, PNorm = 96.2110, GNorm = 0.4444, lr_0 = 8.7186e-04
Loss = 6.4559e-02, PNorm = 96.2978, GNorm = 1.1044, lr_0 = 8.7126e-04
Loss = 7.0285e-02, PNorm = 96.3743, GNorm = 0.5859, lr_0 = 8.7067e-04
Loss = 8.1045e-02, PNorm = 96.4525, GNorm = 0.6137, lr_0 = 8.7007e-04
Loss = 7.2636e-02, PNorm = 96.5410, GNorm = 0.6193, lr_0 = 8.6947e-04
Loss = 7.8452e-02, PNorm = 96.6142, GNorm = 0.4093, lr_0 = 8.6888e-04
Loss = 7.4835e-02, PNorm = 96.6995, GNorm = 0.4722, lr_0 = 8.6828e-04
Loss = 7.4775e-02, PNorm = 96.7820, GNorm = 0.5778, lr_0 = 8.6769e-04
Loss = 7.4554e-02, PNorm = 96.8737, GNorm = 0.4431, lr_0 = 8.6709e-04
Loss = 7.6375e-02, PNorm = 96.9568, GNorm = 0.7683, lr_0 = 8.6650e-04
Loss = 8.4699e-02, PNorm = 97.0478, GNorm = 0.4436, lr_0 = 8.6590e-04
Loss = 7.5375e-02, PNorm = 97.1342, GNorm = 0.5027, lr_0 = 8.6531e-04
Loss = 7.5352e-02, PNorm = 97.2290, GNorm = 0.6131, lr_0 = 8.6472e-04
Loss = 8.7597e-02, PNorm = 97.3086, GNorm = 0.7190, lr_0 = 8.6413e-04
Loss = 7.1160e-02, PNorm = 97.4087, GNorm = 0.3546, lr_0 = 8.6353e-04
Loss = 6.9916e-02, PNorm = 97.4933, GNorm = 0.5246, lr_0 = 8.6294e-04
Loss = 7.7695e-02, PNorm = 97.5730, GNorm = 0.9900, lr_0 = 8.6235e-04
Loss = 7.4953e-02, PNorm = 97.6638, GNorm = 0.8008, lr_0 = 8.6176e-04
Loss = 7.6639e-02, PNorm = 97.7475, GNorm = 0.3254, lr_0 = 8.6117e-04
Loss = 7.3752e-02, PNorm = 97.8426, GNorm = 0.7569, lr_0 = 8.6058e-04
Loss = 8.1207e-02, PNorm = 97.9331, GNorm = 0.5208, lr_0 = 8.5999e-04
Loss = 7.6665e-02, PNorm = 98.0127, GNorm = 0.4480, lr_0 = 8.5940e-04
Loss = 8.0003e-02, PNorm = 98.1007, GNorm = 0.4890, lr_0 = 8.5881e-04
Loss = 7.5961e-02, PNorm = 98.1812, GNorm = 0.8282, lr_0 = 8.5823e-04
Loss = 8.0010e-02, PNorm = 98.2622, GNorm = 0.5293, lr_0 = 8.5764e-04
Loss = 7.3081e-02, PNorm = 98.3500, GNorm = 0.3959, lr_0 = 8.5705e-04
Loss = 7.7500e-02, PNorm = 98.4425, GNorm = 1.0351, lr_0 = 8.5646e-04
Loss = 7.8899e-02, PNorm = 98.5297, GNorm = 0.6613, lr_0 = 8.5588e-04
Loss = 8.3423e-02, PNorm = 98.6308, GNorm = 1.1488, lr_0 = 8.5529e-04
Loss = 7.5697e-02, PNorm = 98.7264, GNorm = 0.5394, lr_0 = 8.5470e-04
Loss = 7.3132e-02, PNorm = 98.8196, GNorm = 0.5271, lr_0 = 8.5412e-04
Loss = 7.5010e-02, PNorm = 98.9015, GNorm = 0.6126, lr_0 = 8.5353e-04
Loss = 8.7371e-02, PNorm = 98.9910, GNorm = 0.4999, lr_0 = 8.5295e-04
Loss = 6.9324e-02, PNorm = 99.0768, GNorm = 0.5500, lr_0 = 8.5236e-04
Loss = 8.8695e-02, PNorm = 99.1742, GNorm = 1.0216, lr_0 = 8.5178e-04
Loss = 7.9257e-02, PNorm = 99.2794, GNorm = 0.6584, lr_0 = 8.5120e-04
Loss = 7.6173e-02, PNorm = 99.3672, GNorm = 0.3664, lr_0 = 8.5061e-04
Loss = 7.9665e-02, PNorm = 99.4566, GNorm = 0.5609, lr_0 = 8.5003e-04
Loss = 6.9004e-02, PNorm = 99.5533, GNorm = 0.7532, lr_0 = 8.4945e-04
Loss = 7.2030e-02, PNorm = 99.6416, GNorm = 0.3719, lr_0 = 8.4887e-04
Loss = 7.0620e-02, PNorm = 99.7293, GNorm = 0.3951, lr_0 = 8.4828e-04
Validation mae = 0.498729
Epoch 4
Loss = 5.7515e-02, PNorm = 99.8102, GNorm = 0.7363, lr_0 = 8.4770e-04
Loss = 6.1751e-02, PNorm = 99.8817, GNorm = 0.8321, lr_0 = 8.4712e-04
Loss = 5.2293e-02, PNorm = 99.9503, GNorm = 0.3214, lr_0 = 8.4654e-04
Loss = 4.8068e-02, PNorm = 100.0124, GNorm = 0.5771, lr_0 = 8.4596e-04
Loss = 4.8552e-02, PNorm = 100.0716, GNorm = 0.4115, lr_0 = 8.4538e-04
Loss = 4.5342e-02, PNorm = 100.1264, GNorm = 0.4977, lr_0 = 8.4480e-04
Loss = 4.8748e-02, PNorm = 100.1740, GNorm = 0.5861, lr_0 = 8.4423e-04
Loss = 5.1029e-02, PNorm = 100.2298, GNorm = 0.2694, lr_0 = 8.4365e-04
Loss = 4.1915e-02, PNorm = 100.2860, GNorm = 0.4685, lr_0 = 8.4307e-04
Loss = 3.8789e-02, PNorm = 100.3316, GNorm = 0.3532, lr_0 = 8.4249e-04
Loss = 4.7732e-02, PNorm = 100.3848, GNorm = 0.3657, lr_0 = 8.4191e-04
Loss = 4.4205e-02, PNorm = 100.4355, GNorm = 0.4939, lr_0 = 8.4134e-04
Loss = 4.4974e-02, PNorm = 100.4914, GNorm = 0.4206, lr_0 = 8.4076e-04
Loss = 4.7845e-02, PNorm = 100.5328, GNorm = 0.5204, lr_0 = 8.4019e-04
Loss = 4.6039e-02, PNorm = 100.5828, GNorm = 0.5401, lr_0 = 8.3961e-04
Loss = 4.8559e-02, PNorm = 100.6394, GNorm = 0.3488, lr_0 = 8.3903e-04
Loss = 4.5626e-02, PNorm = 100.6933, GNorm = 0.5455, lr_0 = 8.3846e-04
Loss = 4.2794e-02, PNorm = 100.7541, GNorm = 0.5619, lr_0 = 8.3789e-04
Loss = 4.4299e-02, PNorm = 100.8108, GNorm = 0.7188, lr_0 = 8.3731e-04
Loss = 4.8656e-02, PNorm = 100.8597, GNorm = 0.3469, lr_0 = 8.3674e-04
Loss = 4.8162e-02, PNorm = 100.9162, GNorm = 0.4157, lr_0 = 8.3616e-04
Loss = 4.4618e-02, PNorm = 100.9678, GNorm = 0.3668, lr_0 = 8.3559e-04
Loss = 4.7421e-02, PNorm = 101.0273, GNorm = 0.3518, lr_0 = 8.3502e-04
Loss = 4.4889e-02, PNorm = 101.0825, GNorm = 0.3414, lr_0 = 8.3445e-04
Loss = 4.1646e-02, PNorm = 101.1511, GNorm = 0.3042, lr_0 = 8.3388e-04
Loss = 4.5625e-02, PNorm = 101.2051, GNorm = 0.6428, lr_0 = 8.3330e-04
Loss = 5.2300e-02, PNorm = 101.2591, GNorm = 0.2898, lr_0 = 8.3273e-04
Loss = 4.4383e-02, PNorm = 101.3140, GNorm = 0.6031, lr_0 = 8.3216e-04
Loss = 4.5842e-02, PNorm = 101.3708, GNorm = 0.5284, lr_0 = 8.3159e-04
Loss = 4.3750e-02, PNorm = 101.4301, GNorm = 0.3439, lr_0 = 8.3102e-04
Loss = 4.8756e-02, PNorm = 101.4846, GNorm = 0.5177, lr_0 = 8.3045e-04
Loss = 4.1007e-02, PNorm = 101.5448, GNorm = 0.3989, lr_0 = 8.2988e-04
Loss = 4.4815e-02, PNorm = 101.6004, GNorm = 0.2923, lr_0 = 8.2932e-04
Loss = 4.4322e-02, PNorm = 101.6560, GNorm = 0.2534, lr_0 = 8.2875e-04
Loss = 4.9100e-02, PNorm = 101.7162, GNorm = 1.2138, lr_0 = 8.2818e-04
Loss = 4.6873e-02, PNorm = 101.7677, GNorm = 0.5167, lr_0 = 8.2761e-04
Loss = 5.1762e-02, PNorm = 101.8363, GNorm = 0.4456, lr_0 = 8.2705e-04
Loss = 4.2349e-02, PNorm = 101.9002, GNorm = 0.4758, lr_0 = 8.2648e-04
Loss = 4.0522e-02, PNorm = 101.9577, GNorm = 0.4321, lr_0 = 8.2591e-04
Loss = 4.9251e-02, PNorm = 102.0160, GNorm = 0.3920, lr_0 = 8.2535e-04
Loss = 4.3366e-02, PNorm = 102.0747, GNorm = 0.2892, lr_0 = 8.2478e-04
Loss = 5.0132e-02, PNorm = 102.1335, GNorm = 0.3268, lr_0 = 8.2422e-04
Loss = 5.1229e-02, PNorm = 102.1924, GNorm = 0.3069, lr_0 = 8.2365e-04
Loss = 4.8292e-02, PNorm = 102.2700, GNorm = 0.3576, lr_0 = 8.2309e-04
Loss = 4.5802e-02, PNorm = 102.3408, GNorm = 0.4401, lr_0 = 8.2252e-04
Loss = 4.9680e-02, PNorm = 102.4172, GNorm = 0.4161, lr_0 = 8.2196e-04
Loss = 5.0740e-02, PNorm = 102.4809, GNorm = 0.4938, lr_0 = 8.2140e-04
Loss = 5.7969e-02, PNorm = 102.5442, GNorm = 0.4839, lr_0 = 8.2084e-04
Loss = 4.9200e-02, PNorm = 102.6148, GNorm = 0.5542, lr_0 = 8.2027e-04
Loss = 4.3430e-02, PNorm = 102.6797, GNorm = 0.3873, lr_0 = 8.1971e-04
Loss = 4.0022e-02, PNorm = 102.7468, GNorm = 0.4181, lr_0 = 8.1915e-04
Loss = 4.3175e-02, PNorm = 102.8072, GNorm = 0.5661, lr_0 = 8.1859e-04
Loss = 4.4545e-02, PNorm = 102.8645, GNorm = 0.4596, lr_0 = 8.1803e-04
Loss = 5.2220e-02, PNorm = 102.9299, GNorm = 0.4065, lr_0 = 8.1747e-04
Loss = 4.9456e-02, PNorm = 102.9944, GNorm = 0.3927, lr_0 = 8.1691e-04
Loss = 4.7487e-02, PNorm = 103.0568, GNorm = 0.5191, lr_0 = 8.1635e-04
Loss = 4.5104e-02, PNorm = 103.1221, GNorm = 0.4077, lr_0 = 8.1579e-04
Loss = 5.8960e-02, PNorm = 103.1877, GNorm = 0.6459, lr_0 = 8.1523e-04
Loss = 5.0143e-02, PNorm = 103.2491, GNorm = 0.3137, lr_0 = 8.1467e-04
Loss = 4.9307e-02, PNorm = 103.3158, GNorm = 0.2681, lr_0 = 8.1411e-04
Loss = 4.4908e-02, PNorm = 103.3845, GNorm = 0.3845, lr_0 = 8.1355e-04
Loss = 4.7879e-02, PNorm = 103.4545, GNorm = 0.3258, lr_0 = 8.1300e-04
Loss = 4.4868e-02, PNorm = 103.5230, GNorm = 0.4310, lr_0 = 8.1244e-04
Loss = 5.4398e-02, PNorm = 103.5914, GNorm = 0.3698, lr_0 = 8.1188e-04
Loss = 4.8282e-02, PNorm = 103.6621, GNorm = 0.3314, lr_0 = 8.1133e-04
Loss = 4.6871e-02, PNorm = 103.7291, GNorm = 0.4490, lr_0 = 8.1077e-04
Loss = 4.1212e-02, PNorm = 103.8096, GNorm = 0.4832, lr_0 = 8.1022e-04
Loss = 5.4906e-02, PNorm = 103.8859, GNorm = 0.6278, lr_0 = 8.0966e-04
Loss = 4.5331e-02, PNorm = 103.9656, GNorm = 0.3595, lr_0 = 8.0911e-04
Loss = 4.9658e-02, PNorm = 104.0230, GNorm = 0.3891, lr_0 = 8.0855e-04
Loss = 5.5135e-02, PNorm = 104.0869, GNorm = 0.5568, lr_0 = 8.0800e-04
Loss = 4.5681e-02, PNorm = 104.1577, GNorm = 0.5989, lr_0 = 8.0745e-04
Loss = 4.6659e-02, PNorm = 104.2290, GNorm = 0.9235, lr_0 = 8.0689e-04
Loss = 5.9210e-02, PNorm = 104.3139, GNorm = 0.5730, lr_0 = 8.0634e-04
Loss = 5.4257e-02, PNorm = 104.3978, GNorm = 0.6211, lr_0 = 8.0579e-04
Loss = 4.8315e-02, PNorm = 104.4702, GNorm = 0.4354, lr_0 = 8.0523e-04
Loss = 5.9990e-02, PNorm = 104.5471, GNorm = 0.3676, lr_0 = 8.0468e-04
Loss = 5.5556e-02, PNorm = 104.6267, GNorm = 0.3904, lr_0 = 8.0413e-04
Loss = 4.6761e-02, PNorm = 104.6994, GNorm = 0.3832, lr_0 = 8.0358e-04
Loss = 5.3473e-02, PNorm = 104.7745, GNorm = 0.3782, lr_0 = 8.0303e-04
Loss = 4.4458e-02, PNorm = 104.8406, GNorm = 0.5217, lr_0 = 8.0248e-04
Loss = 5.6967e-02, PNorm = 104.9096, GNorm = 1.0874, lr_0 = 8.0193e-04
Loss = 5.5320e-02, PNorm = 104.9759, GNorm = 0.4197, lr_0 = 8.0138e-04
Loss = 5.2522e-02, PNorm = 105.0427, GNorm = 0.3778, lr_0 = 8.0083e-04
Loss = 5.0251e-02, PNorm = 105.1281, GNorm = 0.7345, lr_0 = 8.0028e-04
Loss = 5.3384e-02, PNorm = 105.2054, GNorm = 0.3468, lr_0 = 7.9974e-04
Loss = 4.8878e-02, PNorm = 105.2879, GNorm = 0.4251, lr_0 = 7.9919e-04
Loss = 4.9980e-02, PNorm = 105.3581, GNorm = 0.5789, lr_0 = 7.9864e-04
Loss = 5.3930e-02, PNorm = 105.4319, GNorm = 0.4621, lr_0 = 7.9809e-04
Loss = 4.5544e-02, PNorm = 105.5011, GNorm = 0.4452, lr_0 = 7.9755e-04
Loss = 5.2696e-02, PNorm = 105.5718, GNorm = 0.3672, lr_0 = 7.9700e-04
Loss = 4.7537e-02, PNorm = 105.6473, GNorm = 0.9058, lr_0 = 7.9645e-04
Loss = 5.8529e-02, PNorm = 105.7173, GNorm = 0.6964, lr_0 = 7.9591e-04
Loss = 6.1456e-02, PNorm = 105.7822, GNorm = 0.8324, lr_0 = 7.9536e-04
Loss = 5.1551e-02, PNorm = 105.8571, GNorm = 0.4970, lr_0 = 7.9482e-04
Loss = 5.1071e-02, PNorm = 105.9290, GNorm = 0.2633, lr_0 = 7.9427e-04
Loss = 5.5974e-02, PNorm = 106.0071, GNorm = 0.3613, lr_0 = 7.9373e-04
Loss = 5.2464e-02, PNorm = 106.0915, GNorm = 0.3241, lr_0 = 7.9319e-04
Loss = 5.5319e-02, PNorm = 106.1672, GNorm = 0.5931, lr_0 = 7.9264e-04
Loss = 5.0765e-02, PNorm = 106.2503, GNorm = 0.3145, lr_0 = 7.9210e-04
Loss = 4.7418e-02, PNorm = 106.3142, GNorm = 0.8435, lr_0 = 7.9156e-04
Loss = 4.8439e-02, PNorm = 106.3817, GNorm = 0.4632, lr_0 = 7.9101e-04
Loss = 4.9458e-02, PNorm = 106.4526, GNorm = 0.5165, lr_0 = 7.9047e-04
Loss = 5.0921e-02, PNorm = 106.5245, GNorm = 0.4919, lr_0 = 7.8993e-04
Loss = 5.3613e-02, PNorm = 106.5965, GNorm = 0.5754, lr_0 = 7.8939e-04
Loss = 5.8860e-02, PNorm = 106.6719, GNorm = 0.6948, lr_0 = 7.8885e-04
Loss = 5.0411e-02, PNorm = 106.7421, GNorm = 0.3173, lr_0 = 7.8831e-04
Loss = 4.6383e-02, PNorm = 106.8197, GNorm = 0.4668, lr_0 = 7.8777e-04
Loss = 5.8109e-02, PNorm = 106.8897, GNorm = 0.3008, lr_0 = 7.8723e-04
Loss = 5.1644e-02, PNorm = 106.9797, GNorm = 0.5697, lr_0 = 7.8669e-04
Loss = 4.9035e-02, PNorm = 107.0718, GNorm = 0.5775, lr_0 = 7.8615e-04
Loss = 4.7786e-02, PNorm = 107.1472, GNorm = 0.5752, lr_0 = 7.8561e-04
Loss = 5.5501e-02, PNorm = 107.2235, GNorm = 0.3376, lr_0 = 7.8507e-04
Loss = 6.3389e-02, PNorm = 107.3025, GNorm = 0.6669, lr_0 = 7.8454e-04
Loss = 6.1919e-02, PNorm = 107.3890, GNorm = 0.4835, lr_0 = 7.8400e-04
Loss = 5.6697e-02, PNorm = 107.4711, GNorm = 0.3319, lr_0 = 7.8346e-04
Loss = 5.1791e-02, PNorm = 107.5546, GNorm = 0.4649, lr_0 = 7.8293e-04
Loss = 5.4338e-02, PNorm = 107.6438, GNorm = 0.5177, lr_0 = 7.8239e-04
Loss = 5.4828e-02, PNorm = 107.7172, GNorm = 0.6314, lr_0 = 7.8185e-04
Loss = 4.8343e-02, PNorm = 107.7920, GNorm = 0.3264, lr_0 = 7.8132e-04
Validation mae = 0.498223
Epoch 5
Loss = 3.7078e-02, PNorm = 107.8453, GNorm = 0.4513, lr_0 = 7.8078e-04
Loss = 3.5326e-02, PNorm = 107.8971, GNorm = 0.2046, lr_0 = 7.8025e-04
Loss = 3.7659e-02, PNorm = 107.9521, GNorm = 0.6565, lr_0 = 7.7971e-04
Loss = 3.6566e-02, PNorm = 107.9986, GNorm = 0.8923, lr_0 = 7.7918e-04
Loss = 3.6385e-02, PNorm = 108.0409, GNorm = 0.2421, lr_0 = 7.7864e-04
Loss = 3.5467e-02, PNorm = 108.0909, GNorm = 0.2854, lr_0 = 7.7811e-04
Loss = 3.6242e-02, PNorm = 108.1411, GNorm = 0.6763, lr_0 = 7.7758e-04
Loss = 3.1043e-02, PNorm = 108.1883, GNorm = 0.5341, lr_0 = 7.7705e-04
Loss = 3.4676e-02, PNorm = 108.2347, GNorm = 0.2530, lr_0 = 7.7651e-04
Loss = 3.2832e-02, PNorm = 108.2776, GNorm = 0.3312, lr_0 = 7.7598e-04
Loss = 3.3803e-02, PNorm = 108.3231, GNorm = 0.2347, lr_0 = 7.7545e-04
Loss = 3.6737e-02, PNorm = 108.3729, GNorm = 0.3997, lr_0 = 7.7492e-04
Loss = 4.0157e-02, PNorm = 108.4247, GNorm = 0.3499, lr_0 = 7.7439e-04
Loss = 3.9848e-02, PNorm = 108.4729, GNorm = 0.3936, lr_0 = 7.7386e-04
Loss = 3.0007e-02, PNorm = 108.5169, GNorm = 0.2439, lr_0 = 7.7333e-04
Loss = 4.0409e-02, PNorm = 108.5680, GNorm = 0.8305, lr_0 = 7.7280e-04
Loss = 4.3261e-02, PNorm = 108.6257, GNorm = 0.4515, lr_0 = 7.7227e-04
Loss = 3.1669e-02, PNorm = 108.6809, GNorm = 0.2524, lr_0 = 7.7174e-04
Loss = 3.4351e-02, PNorm = 108.7399, GNorm = 0.2113, lr_0 = 7.7121e-04
Loss = 2.9570e-02, PNorm = 108.7951, GNorm = 0.3611, lr_0 = 7.7068e-04
Loss = 3.0656e-02, PNorm = 108.8479, GNorm = 0.6609, lr_0 = 7.7015e-04
Loss = 3.0167e-02, PNorm = 108.8939, GNorm = 0.2637, lr_0 = 7.6963e-04
Loss = 3.4232e-02, PNorm = 108.9402, GNorm = 0.2307, lr_0 = 7.6910e-04
Loss = 3.1953e-02, PNorm = 108.9900, GNorm = 0.3402, lr_0 = 7.6857e-04
Loss = 3.2307e-02, PNorm = 109.0496, GNorm = 0.5478, lr_0 = 7.6805e-04
Loss = 4.1422e-02, PNorm = 109.0986, GNorm = 0.4106, lr_0 = 7.6752e-04
Loss = 3.1508e-02, PNorm = 109.1469, GNorm = 0.4891, lr_0 = 7.6699e-04
Loss = 3.2675e-02, PNorm = 109.2002, GNorm = 0.3523, lr_0 = 7.6647e-04
Loss = 4.2182e-02, PNorm = 109.2560, GNorm = 0.2742, lr_0 = 7.6594e-04
Loss = 3.7664e-02, PNorm = 109.3129, GNorm = 0.5266, lr_0 = 7.6542e-04
Loss = 3.3950e-02, PNorm = 109.3626, GNorm = 0.3229, lr_0 = 7.6489e-04
Loss = 3.4977e-02, PNorm = 109.4121, GNorm = 0.2097, lr_0 = 7.6437e-04
Loss = 3.0692e-02, PNorm = 109.4612, GNorm = 0.2824, lr_0 = 7.6385e-04
Loss = 3.5329e-02, PNorm = 109.5101, GNorm = 0.5126, lr_0 = 7.6332e-04
Loss = 3.0901e-02, PNorm = 109.5676, GNorm = 0.6497, lr_0 = 7.6280e-04
Loss = 3.5760e-02, PNorm = 109.6150, GNorm = 0.4225, lr_0 = 7.6228e-04
Loss = 3.8251e-02, PNorm = 109.6618, GNorm = 0.5221, lr_0 = 7.6176e-04
Loss = 4.3519e-02, PNorm = 109.7150, GNorm = 0.3945, lr_0 = 7.6123e-04
Loss = 3.7969e-02, PNorm = 109.7731, GNorm = 0.3629, lr_0 = 7.6071e-04
Loss = 3.6086e-02, PNorm = 109.8341, GNorm = 0.4828, lr_0 = 7.6019e-04
Loss = 3.5144e-02, PNorm = 109.8967, GNorm = 0.4274, lr_0 = 7.5967e-04
Loss = 3.3689e-02, PNorm = 109.9514, GNorm = 0.2509, lr_0 = 7.5915e-04
Loss = 3.2118e-02, PNorm = 110.0047, GNorm = 0.3839, lr_0 = 7.5863e-04
Loss = 3.9417e-02, PNorm = 110.0676, GNorm = 0.2367, lr_0 = 7.5811e-04
Loss = 3.6320e-02, PNorm = 110.1302, GNorm = 0.5354, lr_0 = 7.5759e-04
Loss = 3.7807e-02, PNorm = 110.1835, GNorm = 0.3401, lr_0 = 7.5707e-04
Loss = 3.3852e-02, PNorm = 110.2424, GNorm = 0.3699, lr_0 = 7.5655e-04
Loss = 3.6358e-02, PNorm = 110.2988, GNorm = 0.4812, lr_0 = 7.5603e-04
Loss = 3.4160e-02, PNorm = 110.3576, GNorm = 0.4040, lr_0 = 7.5552e-04
Loss = 3.1184e-02, PNorm = 110.4119, GNorm = 0.1548, lr_0 = 7.5500e-04
Loss = 3.7251e-02, PNorm = 110.4695, GNorm = 0.7708, lr_0 = 7.5448e-04
Loss = 3.3248e-02, PNorm = 110.5231, GNorm = 0.4396, lr_0 = 7.5397e-04
Loss = 3.8013e-02, PNorm = 110.5794, GNorm = 0.8306, lr_0 = 7.5345e-04
Loss = 3.5376e-02, PNorm = 110.6428, GNorm = 0.7601, lr_0 = 7.5293e-04
Loss = 3.6361e-02, PNorm = 110.7015, GNorm = 0.4091, lr_0 = 7.5242e-04
Loss = 3.6116e-02, PNorm = 110.7716, GNorm = 0.4858, lr_0 = 7.5190e-04
Loss = 3.9795e-02, PNorm = 110.8375, GNorm = 0.3961, lr_0 = 7.5139e-04
Loss = 3.4646e-02, PNorm = 110.8980, GNorm = 0.2712, lr_0 = 7.5087e-04
Loss = 3.3327e-02, PNorm = 110.9594, GNorm = 0.4615, lr_0 = 7.5036e-04
Loss = 2.9027e-02, PNorm = 111.0218, GNorm = 0.4519, lr_0 = 7.4984e-04
Loss = 3.3266e-02, PNorm = 111.0772, GNorm = 0.2825, lr_0 = 7.4933e-04
Loss = 3.3754e-02, PNorm = 111.1366, GNorm = 0.4716, lr_0 = 7.4882e-04
Loss = 4.4124e-02, PNorm = 111.1983, GNorm = 0.3995, lr_0 = 7.4830e-04
Loss = 3.7657e-02, PNorm = 111.2614, GNorm = 0.2403, lr_0 = 7.4779e-04
Loss = 3.9343e-02, PNorm = 111.3184, GNorm = 0.4185, lr_0 = 7.4728e-04
Loss = 3.7221e-02, PNorm = 111.3720, GNorm = 0.5942, lr_0 = 7.4677e-04
Loss = 4.0230e-02, PNorm = 111.4279, GNorm = 0.5348, lr_0 = 7.4625e-04
Loss = 3.8413e-02, PNorm = 111.4910, GNorm = 0.8794, lr_0 = 7.4574e-04
Loss = 3.5389e-02, PNorm = 111.5522, GNorm = 0.6405, lr_0 = 7.4523e-04
Loss = 3.1146e-02, PNorm = 111.6111, GNorm = 0.6211, lr_0 = 7.4472e-04
Loss = 3.6874e-02, PNorm = 111.6748, GNorm = 0.3129, lr_0 = 7.4421e-04
Loss = 3.4514e-02, PNorm = 111.7287, GNorm = 0.5321, lr_0 = 7.4370e-04
Loss = 4.0854e-02, PNorm = 111.7845, GNorm = 0.4748, lr_0 = 7.4319e-04
Loss = 4.2681e-02, PNorm = 111.8421, GNorm = 0.4153, lr_0 = 7.4268e-04
Loss = 3.7913e-02, PNorm = 111.9068, GNorm = 0.2714, lr_0 = 7.4217e-04
Loss = 3.4826e-02, PNorm = 111.9745, GNorm = 0.3647, lr_0 = 7.4167e-04
Loss = 3.5238e-02, PNorm = 112.0450, GNorm = 0.3528, lr_0 = 7.4116e-04
Loss = 3.1652e-02, PNorm = 112.1157, GNorm = 0.2223, lr_0 = 7.4065e-04
Loss = 4.2042e-02, PNorm = 112.1762, GNorm = 0.6190, lr_0 = 7.4014e-04
Loss = 3.7532e-02, PNorm = 112.2425, GNorm = 0.3347, lr_0 = 7.3964e-04
Loss = 3.5971e-02, PNorm = 112.3155, GNorm = 0.3617, lr_0 = 7.3913e-04
Loss = 4.5022e-02, PNorm = 112.3830, GNorm = 0.4494, lr_0 = 7.3862e-04
Loss = 3.4473e-02, PNorm = 112.4436, GNorm = 0.3697, lr_0 = 7.3812e-04
Loss = 3.7240e-02, PNorm = 112.5099, GNorm = 0.2594, lr_0 = 7.3761e-04
Loss = 4.2579e-02, PNorm = 112.5840, GNorm = 0.5102, lr_0 = 7.3711e-04
Loss = 3.3878e-02, PNorm = 112.6453, GNorm = 0.2433, lr_0 = 7.3660e-04
Loss = 3.5480e-02, PNorm = 112.7124, GNorm = 0.7295, lr_0 = 7.3610e-04
Loss = 3.5976e-02, PNorm = 112.7749, GNorm = 0.5811, lr_0 = 7.3559e-04
Loss = 3.6921e-02, PNorm = 112.8322, GNorm = 0.6960, lr_0 = 7.3509e-04
Loss = 4.2476e-02, PNorm = 112.9015, GNorm = 0.3973, lr_0 = 7.3458e-04
Loss = 4.4799e-02, PNorm = 112.9653, GNorm = 0.4878, lr_0 = 7.3408e-04
Loss = 4.0808e-02, PNorm = 113.0413, GNorm = 0.3683, lr_0 = 7.3358e-04
Loss = 4.1751e-02, PNorm = 113.1121, GNorm = 0.3842, lr_0 = 7.3308e-04
Loss = 4.1253e-02, PNorm = 113.1851, GNorm = 0.4501, lr_0 = 7.3257e-04
Loss = 4.1500e-02, PNorm = 113.2556, GNorm = 0.2710, lr_0 = 7.3207e-04
Loss = 3.4490e-02, PNorm = 113.3222, GNorm = 0.5116, lr_0 = 7.3157e-04
Loss = 3.7549e-02, PNorm = 113.3867, GNorm = 0.6987, lr_0 = 7.3107e-04
Loss = 4.4466e-02, PNorm = 113.4512, GNorm = 0.5775, lr_0 = 7.3057e-04
Loss = 4.1883e-02, PNorm = 113.5234, GNorm = 0.2864, lr_0 = 7.3007e-04
Loss = 3.3069e-02, PNorm = 113.5986, GNorm = 0.2759, lr_0 = 7.2957e-04
Loss = 3.8739e-02, PNorm = 113.6654, GNorm = 0.3504, lr_0 = 7.2907e-04
Loss = 4.1194e-02, PNorm = 113.7278, GNorm = 0.8766, lr_0 = 7.2857e-04
Loss = 4.1090e-02, PNorm = 113.8032, GNorm = 0.3236, lr_0 = 7.2807e-04
Loss = 3.7417e-02, PNorm = 113.8708, GNorm = 0.4933, lr_0 = 7.2757e-04
Loss = 4.1449e-02, PNorm = 113.9434, GNorm = 0.5946, lr_0 = 7.2707e-04
Loss = 3.7482e-02, PNorm = 114.0063, GNorm = 0.7353, lr_0 = 7.2657e-04
Loss = 4.1472e-02, PNorm = 114.0776, GNorm = 0.4939, lr_0 = 7.2608e-04
Loss = 3.9486e-02, PNorm = 114.1521, GNorm = 0.4769, lr_0 = 7.2558e-04
Loss = 3.6804e-02, PNorm = 114.2239, GNorm = 0.4939, lr_0 = 7.2508e-04
Loss = 3.7815e-02, PNorm = 114.2819, GNorm = 0.4483, lr_0 = 7.2458e-04
Loss = 3.8697e-02, PNorm = 114.3505, GNorm = 0.3207, lr_0 = 7.2409e-04
Loss = 4.0711e-02, PNorm = 114.4214, GNorm = 0.4235, lr_0 = 7.2359e-04
Loss = 4.2605e-02, PNorm = 114.4940, GNorm = 0.7740, lr_0 = 7.2310e-04
Loss = 3.7275e-02, PNorm = 114.5677, GNorm = 0.4412, lr_0 = 7.2260e-04
Loss = 3.8722e-02, PNorm = 114.6384, GNorm = 0.4965, lr_0 = 7.2211e-04
Loss = 4.1120e-02, PNorm = 114.7073, GNorm = 0.6072, lr_0 = 7.2161e-04
Loss = 4.2267e-02, PNorm = 114.7705, GNorm = 0.6827, lr_0 = 7.2112e-04
Loss = 3.8455e-02, PNorm = 114.8480, GNorm = 0.2385, lr_0 = 7.2062e-04
Loss = 4.5157e-02, PNorm = 114.9134, GNorm = 0.3458, lr_0 = 7.2013e-04
Loss = 4.5369e-02, PNorm = 114.9823, GNorm = 0.3674, lr_0 = 7.1964e-04
Validation mae = 0.494021
Epoch 6
Loss = 3.0976e-02, PNorm = 115.0488, GNorm = 0.4208, lr_0 = 7.1914e-04
Loss = 3.1810e-02, PNorm = 115.1090, GNorm = 0.2786, lr_0 = 7.1865e-04
Loss = 2.8259e-02, PNorm = 115.1557, GNorm = 0.4303, lr_0 = 7.1816e-04
Loss = 3.1771e-02, PNorm = 115.2061, GNorm = 0.3099, lr_0 = 7.1767e-04
Loss = 2.8048e-02, PNorm = 115.2464, GNorm = 0.1712, lr_0 = 7.1717e-04
Loss = 3.4556e-02, PNorm = 115.2905, GNorm = 0.5331, lr_0 = 7.1668e-04
Loss = 2.8792e-02, PNorm = 115.3424, GNorm = 0.2435, lr_0 = 7.1619e-04
Loss = 2.7427e-02, PNorm = 115.3912, GNorm = 0.2061, lr_0 = 7.1570e-04
Loss = 2.7564e-02, PNorm = 115.4374, GNorm = 0.1951, lr_0 = 7.1521e-04
Loss = 2.7572e-02, PNorm = 115.4804, GNorm = 0.5741, lr_0 = 7.1472e-04
Loss = 3.0261e-02, PNorm = 115.5314, GNorm = 0.3101, lr_0 = 7.1423e-04
Loss = 3.0932e-02, PNorm = 115.5842, GNorm = 0.3432, lr_0 = 7.1374e-04
Loss = 3.0287e-02, PNorm = 115.6350, GNorm = 0.3184, lr_0 = 7.1325e-04
Loss = 3.0307e-02, PNorm = 115.6879, GNorm = 0.3248, lr_0 = 7.1277e-04
Loss = 2.6327e-02, PNorm = 115.7371, GNorm = 0.7134, lr_0 = 7.1228e-04
Loss = 3.0885e-02, PNorm = 115.7825, GNorm = 0.6185, lr_0 = 7.1179e-04
Loss = 2.3990e-02, PNorm = 115.8244, GNorm = 0.3130, lr_0 = 7.1130e-04
Loss = 3.0411e-02, PNorm = 115.8745, GNorm = 0.9540, lr_0 = 7.1081e-04
Loss = 2.4314e-02, PNorm = 115.9229, GNorm = 0.1815, lr_0 = 7.1033e-04
Loss = 2.6165e-02, PNorm = 115.9637, GNorm = 0.2430, lr_0 = 7.0984e-04
Loss = 2.8807e-02, PNorm = 116.0116, GNorm = 0.5697, lr_0 = 7.0935e-04
Loss = 2.5642e-02, PNorm = 116.0599, GNorm = 0.4283, lr_0 = 7.0887e-04
Loss = 2.6280e-02, PNorm = 116.1081, GNorm = 0.2572, lr_0 = 7.0838e-04
Loss = 2.6840e-02, PNorm = 116.1556, GNorm = 0.4786, lr_0 = 7.0790e-04
Loss = 2.3544e-02, PNorm = 116.1989, GNorm = 0.5620, lr_0 = 7.0741e-04
Loss = 2.6909e-02, PNorm = 116.2433, GNorm = 0.2618, lr_0 = 7.0693e-04
Loss = 3.0330e-02, PNorm = 116.2862, GNorm = 0.1744, lr_0 = 7.0644e-04
Loss = 2.7934e-02, PNorm = 116.3320, GNorm = 0.8416, lr_0 = 7.0596e-04
Loss = 2.8133e-02, PNorm = 116.3747, GNorm = 0.4702, lr_0 = 7.0548e-04
Loss = 2.9952e-02, PNorm = 116.4242, GNorm = 0.5940, lr_0 = 7.0499e-04
Loss = 3.3258e-02, PNorm = 116.4777, GNorm = 0.2760, lr_0 = 7.0451e-04
Loss = 2.4543e-02, PNorm = 116.5291, GNorm = 0.2728, lr_0 = 7.0403e-04
Loss = 3.1641e-02, PNorm = 116.5786, GNorm = 0.8635, lr_0 = 7.0354e-04
Loss = 2.6725e-02, PNorm = 116.6256, GNorm = 0.7955, lr_0 = 7.0306e-04
Loss = 2.7695e-02, PNorm = 116.6775, GNorm = 0.3248, lr_0 = 7.0258e-04
Loss = 2.6937e-02, PNorm = 116.7268, GNorm = 0.4144, lr_0 = 7.0210e-04
Loss = 2.5076e-02, PNorm = 116.7733, GNorm = 0.5479, lr_0 = 7.0162e-04
Loss = 2.8253e-02, PNorm = 116.8209, GNorm = 0.5286, lr_0 = 7.0114e-04
Loss = 2.8910e-02, PNorm = 116.8776, GNorm = 0.2994, lr_0 = 7.0066e-04
Loss = 2.6511e-02, PNorm = 116.9201, GNorm = 0.3047, lr_0 = 7.0018e-04
Loss = 3.6304e-02, PNorm = 116.9710, GNorm = 0.5214, lr_0 = 6.9970e-04
Loss = 2.6594e-02, PNorm = 117.0258, GNorm = 0.3173, lr_0 = 6.9922e-04
Loss = 2.5706e-02, PNorm = 117.0765, GNorm = 0.8206, lr_0 = 6.9874e-04
Loss = 2.9578e-02, PNorm = 117.1222, GNorm = 0.1952, lr_0 = 6.9826e-04
Loss = 2.4513e-02, PNorm = 117.1670, GNorm = 0.3182, lr_0 = 6.9778e-04
Loss = 2.7603e-02, PNorm = 117.2141, GNorm = 0.4859, lr_0 = 6.9730e-04
Loss = 2.8491e-02, PNorm = 117.2584, GNorm = 0.2357, lr_0 = 6.9683e-04
Loss = 2.4280e-02, PNorm = 117.3134, GNorm = 0.3151, lr_0 = 6.9635e-04
Loss = 2.4424e-02, PNorm = 117.3630, GNorm = 0.5281, lr_0 = 6.9587e-04
Loss = 2.8460e-02, PNorm = 117.4192, GNorm = 0.5628, lr_0 = 6.9540e-04
Loss = 2.5120e-02, PNorm = 117.4631, GNorm = 0.6213, lr_0 = 6.9492e-04
Loss = 2.7360e-02, PNorm = 117.5079, GNorm = 0.3798, lr_0 = 6.9444e-04
Loss = 2.7813e-02, PNorm = 117.5624, GNorm = 0.3777, lr_0 = 6.9397e-04
Loss = 2.6715e-02, PNorm = 117.6169, GNorm = 0.2716, lr_0 = 6.9349e-04
Loss = 2.8972e-02, PNorm = 117.6645, GNorm = 0.4384, lr_0 = 6.9302e-04
Loss = 2.8241e-02, PNorm = 117.7070, GNorm = 0.7928, lr_0 = 6.9254e-04
Loss = 2.5650e-02, PNorm = 117.7595, GNorm = 0.2408, lr_0 = 6.9207e-04
Loss = 2.4653e-02, PNorm = 117.8065, GNorm = 0.5985, lr_0 = 6.9159e-04
Loss = 2.8268e-02, PNorm = 117.8547, GNorm = 0.7532, lr_0 = 6.9112e-04
Loss = 2.8559e-02, PNorm = 117.9010, GNorm = 0.2411, lr_0 = 6.9065e-04
Loss = 2.7553e-02, PNorm = 117.9597, GNorm = 0.5811, lr_0 = 6.9017e-04
Loss = 2.5037e-02, PNorm = 118.0111, GNorm = 0.3557, lr_0 = 6.8970e-04
Loss = 2.4516e-02, PNorm = 118.0581, GNorm = 0.2829, lr_0 = 6.8923e-04
Loss = 2.6318e-02, PNorm = 118.1076, GNorm = 0.2458, lr_0 = 6.8876e-04
Loss = 3.2398e-02, PNorm = 118.1599, GNorm = 0.4499, lr_0 = 6.8828e-04
Loss = 2.2515e-02, PNorm = 118.2107, GNorm = 0.4217, lr_0 = 6.8781e-04
Loss = 2.7948e-02, PNorm = 118.2586, GNorm = 0.7401, lr_0 = 6.8734e-04
Loss = 2.4759e-02, PNorm = 118.3059, GNorm = 0.2699, lr_0 = 6.8687e-04
Loss = 2.7410e-02, PNorm = 118.3584, GNorm = 0.1682, lr_0 = 6.8640e-04
Loss = 2.9313e-02, PNorm = 118.4189, GNorm = 0.7940, lr_0 = 6.8593e-04
Loss = 2.7164e-02, PNorm = 118.4743, GNorm = 0.5962, lr_0 = 6.8546e-04
Loss = 3.8048e-02, PNorm = 118.5355, GNorm = 0.2025, lr_0 = 6.8499e-04
Loss = 2.9861e-02, PNorm = 118.5998, GNorm = 0.6905, lr_0 = 6.8452e-04
Loss = 2.6904e-02, PNorm = 118.6605, GNorm = 0.4048, lr_0 = 6.8405e-04
Loss = 2.9555e-02, PNorm = 118.7154, GNorm = 0.5605, lr_0 = 6.8358e-04
Loss = 2.9156e-02, PNorm = 118.7704, GNorm = 0.4503, lr_0 = 6.8312e-04
Loss = 3.0661e-02, PNorm = 118.8211, GNorm = 0.7674, lr_0 = 6.8265e-04
Loss = 2.8535e-02, PNorm = 118.8805, GNorm = 0.4446, lr_0 = 6.8218e-04
Loss = 2.5823e-02, PNorm = 118.9319, GNorm = 0.3700, lr_0 = 6.8171e-04
Loss = 3.2693e-02, PNorm = 118.9855, GNorm = 0.5314, lr_0 = 6.8125e-04
Loss = 3.0415e-02, PNorm = 119.0434, GNorm = 0.2649, lr_0 = 6.8078e-04
Loss = 3.1280e-02, PNorm = 119.1043, GNorm = 0.2154, lr_0 = 6.8031e-04
Loss = 3.1229e-02, PNorm = 119.1592, GNorm = 0.3295, lr_0 = 6.7985e-04
Loss = 2.9922e-02, PNorm = 119.2207, GNorm = 0.7594, lr_0 = 6.7938e-04
Loss = 3.1079e-02, PNorm = 119.2763, GNorm = 0.2831, lr_0 = 6.7892e-04
Loss = 2.6478e-02, PNorm = 119.3344, GNorm = 0.2113, lr_0 = 6.7845e-04
Loss = 3.1746e-02, PNorm = 119.3866, GNorm = 0.2620, lr_0 = 6.7799e-04
Loss = 3.0234e-02, PNorm = 119.4461, GNorm = 0.3510, lr_0 = 6.7752e-04
Loss = 3.1121e-02, PNorm = 119.5055, GNorm = 0.5233, lr_0 = 6.7706e-04
Loss = 2.9854e-02, PNorm = 119.5676, GNorm = 0.5256, lr_0 = 6.7659e-04
Loss = 2.8577e-02, PNorm = 119.6320, GNorm = 0.3103, lr_0 = 6.7613e-04
Loss = 2.8646e-02, PNorm = 119.6948, GNorm = 0.3175, lr_0 = 6.7567e-04
Loss = 2.8088e-02, PNorm = 119.7532, GNorm = 0.3841, lr_0 = 6.7520e-04
Loss = 3.0265e-02, PNorm = 119.8152, GNorm = 0.3166, lr_0 = 6.7474e-04
Loss = 2.6884e-02, PNorm = 119.8736, GNorm = 0.4829, lr_0 = 6.7428e-04
Loss = 3.1558e-02, PNorm = 119.9275, GNorm = 0.4204, lr_0 = 6.7382e-04
Loss = 2.6592e-02, PNorm = 119.9888, GNorm = 0.3704, lr_0 = 6.7335e-04
Loss = 3.0442e-02, PNorm = 120.0452, GNorm = 0.3132, lr_0 = 6.7289e-04
Loss = 3.2378e-02, PNorm = 120.1043, GNorm = 0.9758, lr_0 = 6.7243e-04
Loss = 3.4141e-02, PNorm = 120.1627, GNorm = 0.4396, lr_0 = 6.7197e-04
Loss = 3.0461e-02, PNorm = 120.2301, GNorm = 0.6625, lr_0 = 6.7151e-04
Loss = 2.9623e-02, PNorm = 120.2995, GNorm = 0.3464, lr_0 = 6.7105e-04
Loss = 3.2465e-02, PNorm = 120.3683, GNorm = 0.4754, lr_0 = 6.7059e-04
Loss = 3.0074e-02, PNorm = 120.4358, GNorm = 0.8038, lr_0 = 6.7013e-04
Loss = 2.9052e-02, PNorm = 120.4923, GNorm = 0.4448, lr_0 = 6.6967e-04
Loss = 3.1965e-02, PNorm = 120.5629, GNorm = 0.4641, lr_0 = 6.6921e-04
Loss = 2.9520e-02, PNorm = 120.6228, GNorm = 0.5954, lr_0 = 6.6876e-04
Loss = 3.6921e-02, PNorm = 120.6951, GNorm = 0.6173, lr_0 = 6.6830e-04
Loss = 3.1269e-02, PNorm = 120.7543, GNorm = 0.4019, lr_0 = 6.6784e-04
Loss = 2.8926e-02, PNorm = 120.8172, GNorm = 0.2673, lr_0 = 6.6738e-04
Loss = 3.3166e-02, PNorm = 120.8813, GNorm = 0.8675, lr_0 = 6.6693e-04
Loss = 2.7401e-02, PNorm = 120.9391, GNorm = 0.2253, lr_0 = 6.6647e-04
Loss = 3.1172e-02, PNorm = 120.9911, GNorm = 0.2606, lr_0 = 6.6601e-04
Loss = 3.2546e-02, PNorm = 121.0474, GNorm = 0.6826, lr_0 = 6.6556e-04
Loss = 2.8025e-02, PNorm = 121.1088, GNorm = 1.0142, lr_0 = 6.6510e-04
Loss = 3.0258e-02, PNorm = 121.1675, GNorm = 0.1969, lr_0 = 6.6464e-04
Loss = 2.9824e-02, PNorm = 121.2241, GNorm = 0.2999, lr_0 = 6.6419e-04
Loss = 3.3413e-02, PNorm = 121.2800, GNorm = 0.4401, lr_0 = 6.6373e-04
Loss = 3.5965e-02, PNorm = 121.3406, GNorm = 0.6061, lr_0 = 6.6328e-04
Loss = 3.2352e-02, PNorm = 121.4054, GNorm = 0.2374, lr_0 = 6.6282e-04
Validation mae = 0.490329
Epoch 7
Loss = 2.9033e-02, PNorm = 121.4637, GNorm = 0.6854, lr_0 = 6.6237e-04
Loss = 2.6853e-02, PNorm = 121.5087, GNorm = 0.5653, lr_0 = 6.6192e-04
Loss = 2.8046e-02, PNorm = 121.5469, GNorm = 0.3405, lr_0 = 6.6146e-04
Loss = 2.3805e-02, PNorm = 121.5903, GNorm = 0.3369, lr_0 = 6.6101e-04
Loss = 2.4368e-02, PNorm = 121.6334, GNorm = 0.2808, lr_0 = 6.6056e-04
Loss = 2.4989e-02, PNorm = 121.6738, GNorm = 0.4087, lr_0 = 6.6011e-04
Loss = 2.4581e-02, PNorm = 121.7079, GNorm = 0.4342, lr_0 = 6.5965e-04
Loss = 2.3619e-02, PNorm = 121.7468, GNorm = 0.1470, lr_0 = 6.5920e-04
Loss = 2.4834e-02, PNorm = 121.7783, GNorm = 0.4857, lr_0 = 6.5875e-04
Loss = 2.3093e-02, PNorm = 121.8187, GNorm = 0.5114, lr_0 = 6.5830e-04
Loss = 2.4336e-02, PNorm = 121.8614, GNorm = 0.5069, lr_0 = 6.5785e-04
Loss = 2.1734e-02, PNorm = 121.9005, GNorm = 0.5059, lr_0 = 6.5740e-04
Loss = 2.5038e-02, PNorm = 121.9419, GNorm = 0.1713, lr_0 = 6.5695e-04
Loss = 2.3882e-02, PNorm = 121.9836, GNorm = 0.2633, lr_0 = 6.5650e-04
Loss = 2.3743e-02, PNorm = 122.0209, GNorm = 0.3664, lr_0 = 6.5605e-04
Loss = 2.1753e-02, PNorm = 122.0580, GNorm = 0.2504, lr_0 = 6.5560e-04
Loss = 2.3473e-02, PNorm = 122.0952, GNorm = 0.2242, lr_0 = 6.5515e-04
Loss = 2.4563e-02, PNorm = 122.1353, GNorm = 0.5810, lr_0 = 6.5470e-04
Loss = 1.9366e-02, PNorm = 122.1840, GNorm = 0.2088, lr_0 = 6.5425e-04
Loss = 2.3379e-02, PNorm = 122.2258, GNorm = 0.3141, lr_0 = 6.5380e-04
Loss = 2.0835e-02, PNorm = 122.2653, GNorm = 0.2667, lr_0 = 6.5335e-04
Loss = 2.1363e-02, PNorm = 122.3068, GNorm = 0.2205, lr_0 = 6.5291e-04
Loss = 2.1307e-02, PNorm = 122.3434, GNorm = 0.5235, lr_0 = 6.5246e-04
Loss = 2.5858e-02, PNorm = 122.3820, GNorm = 0.4045, lr_0 = 6.5201e-04
Loss = 2.3187e-02, PNorm = 122.4243, GNorm = 0.3831, lr_0 = 6.5157e-04
Loss = 2.9692e-02, PNorm = 122.4630, GNorm = 0.5159, lr_0 = 6.5112e-04
Loss = 2.0730e-02, PNorm = 122.5040, GNorm = 0.3660, lr_0 = 6.5067e-04
Loss = 1.8351e-02, PNorm = 122.5472, GNorm = 0.2086, lr_0 = 6.5023e-04
Loss = 2.2090e-02, PNorm = 122.5842, GNorm = 0.1578, lr_0 = 6.4978e-04
Loss = 2.4892e-02, PNorm = 122.6295, GNorm = 0.4534, lr_0 = 6.4934e-04
Loss = 2.2808e-02, PNorm = 122.6695, GNorm = 0.5032, lr_0 = 6.4889e-04
Loss = 2.3635e-02, PNorm = 122.7109, GNorm = 0.2682, lr_0 = 6.4845e-04
Loss = 2.3471e-02, PNorm = 122.7574, GNorm = 0.3244, lr_0 = 6.4800e-04
Loss = 2.3643e-02, PNorm = 122.7968, GNorm = 0.3577, lr_0 = 6.4756e-04
Loss = 1.9447e-02, PNorm = 122.8342, GNorm = 0.1699, lr_0 = 6.4712e-04
Loss = 2.6089e-02, PNorm = 122.8776, GNorm = 0.8460, lr_0 = 6.4667e-04
Loss = 2.5529e-02, PNorm = 122.9281, GNorm = 0.2468, lr_0 = 6.4623e-04
Loss = 2.1906e-02, PNorm = 122.9789, GNorm = 0.3313, lr_0 = 6.4579e-04
Loss = 2.2603e-02, PNorm = 123.0214, GNorm = 0.2940, lr_0 = 6.4534e-04
Loss = 2.5678e-02, PNorm = 123.0677, GNorm = 0.2572, lr_0 = 6.4490e-04
Loss = 2.7487e-02, PNorm = 123.1191, GNorm = 0.2427, lr_0 = 6.4446e-04
Loss = 2.2735e-02, PNorm = 123.1631, GNorm = 0.2923, lr_0 = 6.4402e-04
Loss = 2.3505e-02, PNorm = 123.2062, GNorm = 0.4375, lr_0 = 6.4358e-04
Loss = 2.3904e-02, PNorm = 123.2448, GNorm = 0.1493, lr_0 = 6.4314e-04
Loss = 2.3289e-02, PNorm = 123.2885, GNorm = 0.2149, lr_0 = 6.4270e-04
Loss = 2.0152e-02, PNorm = 123.3286, GNorm = 0.2914, lr_0 = 6.4226e-04
Loss = 2.4320e-02, PNorm = 123.3706, GNorm = 0.2050, lr_0 = 6.4182e-04
Loss = 2.3802e-02, PNorm = 123.4212, GNorm = 0.4621, lr_0 = 6.4138e-04
Loss = 2.4363e-02, PNorm = 123.4653, GNorm = 0.4600, lr_0 = 6.4094e-04
Loss = 2.1269e-02, PNorm = 123.5045, GNorm = 0.2910, lr_0 = 6.4050e-04
Loss = 2.1251e-02, PNorm = 123.5461, GNorm = 0.2518, lr_0 = 6.4006e-04
Loss = 2.2279e-02, PNorm = 123.5907, GNorm = 0.2042, lr_0 = 6.3962e-04
Loss = 2.5633e-02, PNorm = 123.6349, GNorm = 0.5920, lr_0 = 6.3918e-04
Loss = 2.0894e-02, PNorm = 123.6872, GNorm = 0.2149, lr_0 = 6.3874e-04
Loss = 1.9784e-02, PNorm = 123.7346, GNorm = 0.3056, lr_0 = 6.3831e-04
Loss = 2.2469e-02, PNorm = 123.7724, GNorm = 0.3258, lr_0 = 6.3787e-04
Loss = 2.3896e-02, PNorm = 123.8098, GNorm = 0.4412, lr_0 = 6.3743e-04
Loss = 2.2725e-02, PNorm = 123.8524, GNorm = 0.2592, lr_0 = 6.3700e-04
Loss = 2.7085e-02, PNorm = 123.8959, GNorm = 0.4001, lr_0 = 6.3656e-04
Loss = 2.2594e-02, PNorm = 123.9395, GNorm = 0.2467, lr_0 = 6.3612e-04
Loss = 2.2148e-02, PNorm = 123.9901, GNorm = 0.4774, lr_0 = 6.3569e-04
Loss = 2.1928e-02, PNorm = 124.0401, GNorm = 0.4324, lr_0 = 6.3525e-04
Loss = 2.0925e-02, PNorm = 124.0902, GNorm = 0.4867, lr_0 = 6.3482e-04
Loss = 2.4047e-02, PNorm = 124.1367, GNorm = 0.2668, lr_0 = 6.3438e-04
Loss = 2.2147e-02, PNorm = 124.1825, GNorm = 0.2587, lr_0 = 6.3395e-04
Loss = 1.9577e-02, PNorm = 124.2295, GNorm = 0.2464, lr_0 = 6.3351e-04
Loss = 2.2953e-02, PNorm = 124.2760, GNorm = 0.1484, lr_0 = 6.3308e-04
Loss = 1.8273e-02, PNorm = 124.3235, GNorm = 0.3367, lr_0 = 6.3265e-04
Loss = 2.6102e-02, PNorm = 124.3655, GNorm = 0.5623, lr_0 = 6.3221e-04
Loss = 2.2030e-02, PNorm = 124.4126, GNorm = 0.2644, lr_0 = 6.3178e-04
Loss = 2.3282e-02, PNorm = 124.4617, GNorm = 0.1931, lr_0 = 6.3135e-04
Loss = 2.0737e-02, PNorm = 124.5077, GNorm = 0.2685, lr_0 = 6.3091e-04
Loss = 1.8798e-02, PNorm = 124.5499, GNorm = 0.2742, lr_0 = 6.3048e-04
Loss = 2.3560e-02, PNorm = 124.5893, GNorm = 0.2068, lr_0 = 6.3005e-04
Loss = 2.4308e-02, PNorm = 124.6337, GNorm = 0.3168, lr_0 = 6.2962e-04
Loss = 2.2845e-02, PNorm = 124.6818, GNorm = 0.4441, lr_0 = 6.2919e-04
Loss = 2.2177e-02, PNorm = 124.7277, GNorm = 0.6420, lr_0 = 6.2876e-04
Loss = 2.9234e-02, PNorm = 124.7745, GNorm = 0.2823, lr_0 = 6.2833e-04
Loss = 2.4709e-02, PNorm = 124.8245, GNorm = 0.5506, lr_0 = 6.2789e-04
Loss = 2.3109e-02, PNorm = 124.8721, GNorm = 0.1587, lr_0 = 6.2746e-04
Loss = 2.3601e-02, PNorm = 124.9230, GNorm = 0.2778, lr_0 = 6.2703e-04
Loss = 2.4426e-02, PNorm = 124.9753, GNorm = 0.3447, lr_0 = 6.2661e-04
Loss = 2.5204e-02, PNorm = 125.0198, GNorm = 0.3193, lr_0 = 6.2618e-04
Loss = 2.1538e-02, PNorm = 125.0682, GNorm = 0.1748, lr_0 = 6.2575e-04
Loss = 2.3646e-02, PNorm = 125.1170, GNorm = 0.3266, lr_0 = 6.2532e-04
Loss = 2.4400e-02, PNorm = 125.1717, GNorm = 0.3819, lr_0 = 6.2489e-04
Loss = 2.6061e-02, PNorm = 125.2254, GNorm = 0.1777, lr_0 = 6.2446e-04
Loss = 2.3829e-02, PNorm = 125.2758, GNorm = 0.3278, lr_0 = 6.2403e-04
Loss = 2.6917e-02, PNorm = 125.3221, GNorm = 0.2601, lr_0 = 6.2361e-04
Loss = 2.4542e-02, PNorm = 125.3726, GNorm = 0.6388, lr_0 = 6.2318e-04
Loss = 2.5135e-02, PNorm = 125.4300, GNorm = 0.4809, lr_0 = 6.2275e-04
Loss = 2.3844e-02, PNorm = 125.4828, GNorm = 0.1847, lr_0 = 6.2233e-04
Loss = 2.2849e-02, PNorm = 125.5366, GNorm = 0.4930, lr_0 = 6.2190e-04
Loss = 2.9850e-02, PNorm = 125.5842, GNorm = 0.5892, lr_0 = 6.2147e-04
Loss = 2.4021e-02, PNorm = 125.6346, GNorm = 0.4914, lr_0 = 6.2105e-04
Loss = 2.1781e-02, PNorm = 125.6909, GNorm = 0.1526, lr_0 = 6.2062e-04
Loss = 2.3120e-02, PNorm = 125.7429, GNorm = 0.3123, lr_0 = 6.2020e-04
Loss = 2.6106e-02, PNorm = 125.7950, GNorm = 0.5580, lr_0 = 6.1977e-04
Loss = 2.3545e-02, PNorm = 125.8478, GNorm = 0.3138, lr_0 = 6.1935e-04
Loss = 2.2500e-02, PNorm = 125.8974, GNorm = 0.4008, lr_0 = 6.1892e-04
Loss = 2.2889e-02, PNorm = 125.9397, GNorm = 0.5997, lr_0 = 6.1850e-04
Loss = 2.4753e-02, PNorm = 125.9879, GNorm = 0.5528, lr_0 = 6.1808e-04
Loss = 2.6215e-02, PNorm = 126.0357, GNorm = 1.0795, lr_0 = 6.1765e-04
Loss = 2.5139e-02, PNorm = 126.0857, GNorm = 0.5851, lr_0 = 6.1723e-04
Loss = 2.2969e-02, PNorm = 126.1304, GNorm = 0.5511, lr_0 = 6.1681e-04
Loss = 2.2061e-02, PNorm = 126.1826, GNorm = 0.4091, lr_0 = 6.1638e-04
Loss = 2.6528e-02, PNorm = 126.2303, GNorm = 0.3811, lr_0 = 6.1596e-04
Loss = 2.4329e-02, PNorm = 126.2782, GNorm = 0.3040, lr_0 = 6.1554e-04
Loss = 2.3793e-02, PNorm = 126.3355, GNorm = 0.5724, lr_0 = 6.1512e-04
Loss = 2.6676e-02, PNorm = 126.3893, GNorm = 1.0643, lr_0 = 6.1470e-04
Loss = 2.7479e-02, PNorm = 126.4445, GNorm = 0.9308, lr_0 = 6.1428e-04
Loss = 2.5549e-02, PNorm = 126.5023, GNorm = 0.3651, lr_0 = 6.1385e-04
Loss = 2.2675e-02, PNorm = 126.5533, GNorm = 0.2012, lr_0 = 6.1343e-04
Loss = 2.4459e-02, PNorm = 126.5998, GNorm = 0.5896, lr_0 = 6.1301e-04
Loss = 2.7476e-02, PNorm = 126.6430, GNorm = 0.6884, lr_0 = 6.1259e-04
Loss = 2.6317e-02, PNorm = 126.6985, GNorm = 0.3800, lr_0 = 6.1217e-04
Loss = 2.3915e-02, PNorm = 126.7519, GNorm = 0.3344, lr_0 = 6.1175e-04
Loss = 2.1776e-02, PNorm = 126.8029, GNorm = 0.2257, lr_0 = 6.1134e-04
Loss = 2.2502e-02, PNorm = 126.8541, GNorm = 0.2708, lr_0 = 6.1092e-04
Loss = 2.6002e-02, PNorm = 126.9063, GNorm = 0.3399, lr_0 = 6.1050e-04
Validation mae = 0.489102
Epoch 8
Loss = 1.7725e-02, PNorm = 126.9498, GNorm = 0.3011, lr_0 = 6.1008e-04
Loss = 2.1857e-02, PNorm = 126.9743, GNorm = 0.3928, lr_0 = 6.0966e-04
Loss = 1.7050e-02, PNorm = 127.0023, GNorm = 0.2098, lr_0 = 6.0924e-04
Loss = 2.1267e-02, PNorm = 127.0328, GNorm = 0.1806, lr_0 = 6.0883e-04
Loss = 1.9248e-02, PNorm = 127.0697, GNorm = 0.2539, lr_0 = 6.0841e-04
Loss = 1.9068e-02, PNorm = 127.1042, GNorm = 0.2833, lr_0 = 6.0799e-04
Loss = 1.8069e-02, PNorm = 127.1374, GNorm = 0.5406, lr_0 = 6.0758e-04
Loss = 2.0149e-02, PNorm = 127.1612, GNorm = 0.5136, lr_0 = 6.0716e-04
Loss = 1.8558e-02, PNorm = 127.1939, GNorm = 0.2703, lr_0 = 6.0674e-04
Loss = 2.0641e-02, PNorm = 127.2329, GNorm = 0.5085, lr_0 = 6.0633e-04
Loss = 2.0449e-02, PNorm = 127.2707, GNorm = 0.3393, lr_0 = 6.0591e-04
Loss = 1.8875e-02, PNorm = 127.3063, GNorm = 0.2250, lr_0 = 6.0550e-04
Loss = 2.0236e-02, PNorm = 127.3385, GNorm = 0.5705, lr_0 = 6.0508e-04
Loss = 1.8823e-02, PNorm = 127.3797, GNorm = 0.4510, lr_0 = 6.0467e-04
Loss = 2.2696e-02, PNorm = 127.4135, GNorm = 0.2131, lr_0 = 6.0425e-04
Loss = 1.6527e-02, PNorm = 127.4414, GNorm = 0.1749, lr_0 = 6.0384e-04
Loss = 2.0818e-02, PNorm = 127.4749, GNorm = 0.5156, lr_0 = 6.0343e-04
Loss = 2.2031e-02, PNorm = 127.5049, GNorm = 0.3425, lr_0 = 6.0301e-04
Loss = 1.9480e-02, PNorm = 127.5366, GNorm = 0.4219, lr_0 = 6.0260e-04
Loss = 1.9323e-02, PNorm = 127.5690, GNorm = 0.5172, lr_0 = 6.0219e-04
Loss = 1.8813e-02, PNorm = 127.5980, GNorm = 0.1963, lr_0 = 6.0178e-04
Loss = 1.8823e-02, PNorm = 127.6325, GNorm = 0.3157, lr_0 = 6.0136e-04
Loss = 1.6801e-02, PNorm = 127.6656, GNorm = 0.2187, lr_0 = 6.0095e-04
Loss = 1.6664e-02, PNorm = 127.6985, GNorm = 0.1748, lr_0 = 6.0054e-04
Loss = 2.3957e-02, PNorm = 127.7312, GNorm = 0.5884, lr_0 = 6.0013e-04
Loss = 1.7645e-02, PNorm = 127.7611, GNorm = 0.4418, lr_0 = 5.9972e-04
Loss = 1.5253e-02, PNorm = 127.7973, GNorm = 0.4760, lr_0 = 5.9931e-04
Loss = 1.5666e-02, PNorm = 127.8363, GNorm = 0.5827, lr_0 = 5.9890e-04
Loss = 1.6404e-02, PNorm = 127.8725, GNorm = 0.4256, lr_0 = 5.9849e-04
Loss = 1.9316e-02, PNorm = 127.9092, GNorm = 0.5520, lr_0 = 5.9808e-04
Loss = 1.5979e-02, PNorm = 127.9442, GNorm = 0.4435, lr_0 = 5.9767e-04
Loss = 1.5649e-02, PNorm = 127.9744, GNorm = 0.4011, lr_0 = 5.9726e-04
Loss = 1.7188e-02, PNorm = 128.0056, GNorm = 0.2129, lr_0 = 5.9685e-04
Loss = 1.8610e-02, PNorm = 128.0419, GNorm = 0.4936, lr_0 = 5.9644e-04
Loss = 1.4516e-02, PNorm = 128.0775, GNorm = 0.4664, lr_0 = 5.9603e-04
Loss = 1.8227e-02, PNorm = 128.1093, GNorm = 0.1340, lr_0 = 5.9562e-04
Loss = 2.0351e-02, PNorm = 128.1430, GNorm = 0.3729, lr_0 = 5.9521e-04
Loss = 1.5781e-02, PNorm = 128.1837, GNorm = 0.5724, lr_0 = 5.9481e-04
Loss = 1.6768e-02, PNorm = 128.2211, GNorm = 0.1926, lr_0 = 5.9440e-04
Loss = 1.7066e-02, PNorm = 128.2614, GNorm = 0.1816, lr_0 = 5.9399e-04
Loss = 1.8821e-02, PNorm = 128.2955, GNorm = 0.5744, lr_0 = 5.9358e-04
Loss = 1.8413e-02, PNorm = 128.3258, GNorm = 0.3611, lr_0 = 5.9318e-04
Loss = 1.6457e-02, PNorm = 128.3600, GNorm = 0.2449, lr_0 = 5.9277e-04
Loss = 1.9549e-02, PNorm = 128.3984, GNorm = 0.2477, lr_0 = 5.9236e-04
Loss = 1.7206e-02, PNorm = 128.4327, GNorm = 0.7233, lr_0 = 5.9196e-04
Loss = 1.7152e-02, PNorm = 128.4717, GNorm = 0.1836, lr_0 = 5.9155e-04
Loss = 1.5408e-02, PNorm = 128.5073, GNorm = 0.1352, lr_0 = 5.9115e-04
Loss = 1.9322e-02, PNorm = 128.5427, GNorm = 0.3485, lr_0 = 5.9074e-04
Loss = 2.1456e-02, PNorm = 128.5759, GNorm = 0.2024, lr_0 = 5.9034e-04
Loss = 1.9611e-02, PNorm = 128.6166, GNorm = 0.4504, lr_0 = 5.8993e-04
Loss = 1.6518e-02, PNorm = 128.6556, GNorm = 0.3554, lr_0 = 5.8953e-04
Loss = 1.7414e-02, PNorm = 128.6955, GNorm = 0.3505, lr_0 = 5.8913e-04
Loss = 1.7518e-02, PNorm = 128.7273, GNorm = 0.3325, lr_0 = 5.8872e-04
Loss = 1.7743e-02, PNorm = 128.7665, GNorm = 0.2021, lr_0 = 5.8832e-04
Loss = 1.9844e-02, PNorm = 128.8035, GNorm = 0.3710, lr_0 = 5.8792e-04
Loss = 1.8151e-02, PNorm = 128.8424, GNorm = 0.2483, lr_0 = 5.8751e-04
Loss = 1.8934e-02, PNorm = 128.8786, GNorm = 0.4745, lr_0 = 5.8711e-04
Loss = 1.9913e-02, PNorm = 128.9157, GNorm = 0.3516, lr_0 = 5.8671e-04
Loss = 2.0185e-02, PNorm = 128.9548, GNorm = 0.2105, lr_0 = 5.8631e-04
Loss = 1.8198e-02, PNorm = 128.9890, GNorm = 0.2659, lr_0 = 5.8591e-04
Loss = 2.0303e-02, PNorm = 129.0262, GNorm = 0.7757, lr_0 = 5.8550e-04
Loss = 2.0290e-02, PNorm = 129.0715, GNorm = 0.6633, lr_0 = 5.8510e-04
Loss = 2.1465e-02, PNorm = 129.1125, GNorm = 0.3668, lr_0 = 5.8470e-04
Loss = 2.1692e-02, PNorm = 129.1478, GNorm = 0.5081, lr_0 = 5.8430e-04
Loss = 2.0512e-02, PNorm = 129.1882, GNorm = 0.4843, lr_0 = 5.8390e-04
Loss = 1.8115e-02, PNorm = 129.2319, GNorm = 0.2911, lr_0 = 5.8350e-04
Loss = 1.7746e-02, PNorm = 129.2695, GNorm = 0.2990, lr_0 = 5.8310e-04
Loss = 1.7550e-02, PNorm = 129.3074, GNorm = 0.9639, lr_0 = 5.8270e-04
Loss = 1.8905e-02, PNorm = 129.3442, GNorm = 0.2444, lr_0 = 5.8230e-04
Loss = 1.7406e-02, PNorm = 129.3892, GNorm = 0.3182, lr_0 = 5.8190e-04
Loss = 1.6682e-02, PNorm = 129.4297, GNorm = 0.4804, lr_0 = 5.8151e-04
Loss = 1.7712e-02, PNorm = 129.4719, GNorm = 0.4773, lr_0 = 5.8111e-04
Loss = 1.7105e-02, PNorm = 129.5058, GNorm = 0.5205, lr_0 = 5.8071e-04
Loss = 2.0892e-02, PNorm = 129.5500, GNorm = 0.1653, lr_0 = 5.8031e-04
Loss = 1.8217e-02, PNorm = 129.5944, GNorm = 0.1734, lr_0 = 5.7991e-04
Loss = 1.7683e-02, PNorm = 129.6383, GNorm = 0.4480, lr_0 = 5.7952e-04
Loss = 1.8500e-02, PNorm = 129.6768, GNorm = 0.3270, lr_0 = 5.7912e-04
Loss = 2.0163e-02, PNorm = 129.7182, GNorm = 0.3486, lr_0 = 5.7872e-04
Loss = 2.0933e-02, PNorm = 129.7558, GNorm = 0.6236, lr_0 = 5.7833e-04
Loss = 1.7832e-02, PNorm = 129.7962, GNorm = 0.3384, lr_0 = 5.7793e-04
Loss = 1.8863e-02, PNorm = 129.8405, GNorm = 0.2135, lr_0 = 5.7753e-04
Loss = 1.7768e-02, PNorm = 129.8841, GNorm = 0.2070, lr_0 = 5.7714e-04
Loss = 1.8152e-02, PNorm = 129.9220, GNorm = 0.2738, lr_0 = 5.7674e-04
Loss = 1.7691e-02, PNorm = 129.9582, GNorm = 0.2827, lr_0 = 5.7635e-04
Loss = 1.5489e-02, PNorm = 130.0070, GNorm = 0.1764, lr_0 = 5.7595e-04
Loss = 1.8848e-02, PNorm = 130.0535, GNorm = 0.2593, lr_0 = 5.7556e-04
Loss = 1.8370e-02, PNorm = 130.0950, GNorm = 0.4988, lr_0 = 5.7516e-04
Loss = 1.7773e-02, PNorm = 130.1360, GNorm = 0.4794, lr_0 = 5.7477e-04
Loss = 1.8063e-02, PNorm = 130.1800, GNorm = 0.3993, lr_0 = 5.7438e-04
Loss = 1.6910e-02, PNorm = 130.2154, GNorm = 0.2429, lr_0 = 5.7398e-04
Loss = 1.7350e-02, PNorm = 130.2567, GNorm = 0.3374, lr_0 = 5.7359e-04
Loss = 2.0935e-02, PNorm = 130.2944, GNorm = 0.3360, lr_0 = 5.7320e-04
Loss = 1.9235e-02, PNorm = 130.3384, GNorm = 0.6258, lr_0 = 5.7280e-04
Loss = 1.9737e-02, PNorm = 130.3850, GNorm = 0.3635, lr_0 = 5.7241e-04
Loss = 1.6298e-02, PNorm = 130.4274, GNorm = 0.3696, lr_0 = 5.7202e-04
Loss = 2.1672e-02, PNorm = 130.4629, GNorm = 0.6217, lr_0 = 5.7163e-04
Loss = 2.0386e-02, PNorm = 130.5053, GNorm = 0.4727, lr_0 = 5.7124e-04
Loss = 1.8687e-02, PNorm = 130.5464, GNorm = 0.4551, lr_0 = 5.7084e-04
Loss = 1.6867e-02, PNorm = 130.5864, GNorm = 0.4732, lr_0 = 5.7045e-04
Loss = 2.2158e-02, PNorm = 130.6305, GNorm = 0.3257, lr_0 = 5.7006e-04
Loss = 1.8203e-02, PNorm = 130.6751, GNorm = 0.2030, lr_0 = 5.6967e-04
Loss = 2.1630e-02, PNorm = 130.7199, GNorm = 0.1882, lr_0 = 5.6928e-04
Loss = 2.0537e-02, PNorm = 130.7667, GNorm = 0.2281, lr_0 = 5.6889e-04
Loss = 1.8262e-02, PNorm = 130.8089, GNorm = 0.2378, lr_0 = 5.6850e-04
Loss = 1.7680e-02, PNorm = 130.8448, GNorm = 0.3355, lr_0 = 5.6811e-04
Loss = 1.7318e-02, PNorm = 130.8813, GNorm = 0.3070, lr_0 = 5.6772e-04
Loss = 1.8712e-02, PNorm = 130.9174, GNorm = 0.1397, lr_0 = 5.6733e-04
Loss = 1.6508e-02, PNorm = 130.9652, GNorm = 0.1999, lr_0 = 5.6695e-04
Loss = 2.4089e-02, PNorm = 131.0140, GNorm = 0.4752, lr_0 = 5.6656e-04
Loss = 2.2489e-02, PNorm = 131.0666, GNorm = 0.4363, lr_0 = 5.6617e-04
Loss = 1.6095e-02, PNorm = 131.1113, GNorm = 0.1408, lr_0 = 5.6578e-04
Loss = 1.7880e-02, PNorm = 131.1556, GNorm = 0.3426, lr_0 = 5.6539e-04
Loss = 2.1454e-02, PNorm = 131.1962, GNorm = 0.1731, lr_0 = 5.6501e-04
Loss = 1.9572e-02, PNorm = 131.2376, GNorm = 0.1815, lr_0 = 5.6462e-04
Loss = 1.8498e-02, PNorm = 131.2713, GNorm = 0.3665, lr_0 = 5.6423e-04
Loss = 2.1349e-02, PNorm = 131.3106, GNorm = 0.4566, lr_0 = 5.6385e-04
Loss = 1.5340e-02, PNorm = 131.3521, GNorm = 0.1346, lr_0 = 5.6346e-04
Loss = 1.6468e-02, PNorm = 131.3889, GNorm = 0.3894, lr_0 = 5.6307e-04
Loss = 2.0835e-02, PNorm = 131.4164, GNorm = 1.0210, lr_0 = 5.6269e-04
Loss = 1.9904e-02, PNorm = 131.4535, GNorm = 0.5977, lr_0 = 5.6230e-04
Validation mae = 0.485264
Epoch 9
Loss = 1.5790e-02, PNorm = 131.4959, GNorm = 0.4176, lr_0 = 5.6192e-04
Loss = 1.8533e-02, PNorm = 131.5295, GNorm = 0.5746, lr_0 = 5.6153e-04
Loss = 1.3933e-02, PNorm = 131.5624, GNorm = 0.1591, lr_0 = 5.6115e-04
Loss = 1.4810e-02, PNorm = 131.5907, GNorm = 0.2112, lr_0 = 5.6076e-04
Loss = 1.4761e-02, PNorm = 131.6168, GNorm = 0.2321, lr_0 = 5.6038e-04
Loss = 1.5685e-02, PNorm = 131.6433, GNorm = 0.1553, lr_0 = 5.6000e-04
Loss = 1.3600e-02, PNorm = 131.6718, GNorm = 0.2919, lr_0 = 5.5961e-04
Loss = 1.3571e-02, PNorm = 131.7024, GNorm = 0.1947, lr_0 = 5.5923e-04
Loss = 1.3820e-02, PNorm = 131.7316, GNorm = 0.2127, lr_0 = 5.5885e-04
Loss = 1.6806e-02, PNorm = 131.7573, GNorm = 0.2400, lr_0 = 5.5846e-04
Loss = 1.5927e-02, PNorm = 131.7857, GNorm = 0.1556, lr_0 = 5.5808e-04
Loss = 1.8599e-02, PNorm = 131.8162, GNorm = 0.2418, lr_0 = 5.5770e-04
Loss = 1.4366e-02, PNorm = 131.8443, GNorm = 0.2425, lr_0 = 5.5732e-04
Loss = 1.5617e-02, PNorm = 131.8749, GNorm = 0.3267, lr_0 = 5.5693e-04
Loss = 1.3304e-02, PNorm = 131.9070, GNorm = 0.4132, lr_0 = 5.5655e-04
Loss = 1.4943e-02, PNorm = 131.9360, GNorm = 0.2975, lr_0 = 5.5617e-04
Loss = 1.4569e-02, PNorm = 131.9654, GNorm = 0.6211, lr_0 = 5.5579e-04
Loss = 1.2655e-02, PNorm = 131.9938, GNorm = 0.3195, lr_0 = 5.5541e-04
Loss = 1.1764e-02, PNorm = 132.0194, GNorm = 0.1989, lr_0 = 5.5503e-04
Loss = 1.2514e-02, PNorm = 132.0437, GNorm = 0.1377, lr_0 = 5.5465e-04
Loss = 1.6217e-02, PNorm = 132.0668, GNorm = 0.3227, lr_0 = 5.5427e-04
Loss = 1.5609e-02, PNorm = 132.0937, GNorm = 0.3485, lr_0 = 5.5389e-04
Loss = 1.4570e-02, PNorm = 132.1248, GNorm = 0.2228, lr_0 = 5.5351e-04
Loss = 1.6383e-02, PNorm = 132.1630, GNorm = 0.7625, lr_0 = 5.5313e-04
Loss = 1.2874e-02, PNorm = 132.1917, GNorm = 0.3187, lr_0 = 5.5275e-04
Loss = 1.2647e-02, PNorm = 132.2193, GNorm = 0.2675, lr_0 = 5.5237e-04
Loss = 1.9518e-02, PNorm = 132.2479, GNorm = 0.2122, lr_0 = 5.5199e-04
Loss = 1.6909e-02, PNorm = 132.2819, GNorm = 0.4024, lr_0 = 5.5162e-04
Loss = 1.5634e-02, PNorm = 132.3183, GNorm = 0.2417, lr_0 = 5.5124e-04
Loss = 1.4786e-02, PNorm = 132.3473, GNorm = 0.3175, lr_0 = 5.5086e-04
Loss = 1.5348e-02, PNorm = 132.3727, GNorm = 0.3923, lr_0 = 5.5048e-04
Loss = 1.2674e-02, PNorm = 132.4015, GNorm = 0.3729, lr_0 = 5.5011e-04
Loss = 1.4454e-02, PNorm = 132.4320, GNorm = 0.2104, lr_0 = 5.4973e-04
Loss = 1.3190e-02, PNorm = 132.4660, GNorm = 0.3061, lr_0 = 5.4935e-04
Loss = 1.5553e-02, PNorm = 132.4955, GNorm = 0.4680, lr_0 = 5.4898e-04
Loss = 1.3815e-02, PNorm = 132.5227, GNorm = 0.5653, lr_0 = 5.4860e-04
Loss = 1.3051e-02, PNorm = 132.5530, GNorm = 0.3702, lr_0 = 5.4822e-04
Loss = 1.5331e-02, PNorm = 132.5892, GNorm = 0.3015, lr_0 = 5.4785e-04
Loss = 1.7796e-02, PNorm = 132.6198, GNorm = 0.5728, lr_0 = 5.4747e-04
Loss = 1.3827e-02, PNorm = 132.6516, GNorm = 0.2344, lr_0 = 5.4710e-04
Loss = 1.9162e-02, PNorm = 132.6796, GNorm = 0.6018, lr_0 = 5.4672e-04
Loss = 1.4806e-02, PNorm = 132.7159, GNorm = 0.4801, lr_0 = 5.4635e-04
Loss = 1.5889e-02, PNorm = 132.7464, GNorm = 0.3662, lr_0 = 5.4597e-04
Loss = 1.3903e-02, PNorm = 132.7793, GNorm = 0.3632, lr_0 = 5.4560e-04
Loss = 1.5468e-02, PNorm = 132.8090, GNorm = 0.2583, lr_0 = 5.4523e-04
Loss = 1.4756e-02, PNorm = 132.8441, GNorm = 0.6316, lr_0 = 5.4485e-04
Loss = 1.5354e-02, PNorm = 132.8809, GNorm = 0.3978, lr_0 = 5.4448e-04
Loss = 1.8922e-02, PNorm = 132.9125, GNorm = 0.2533, lr_0 = 5.4411e-04
Loss = 1.4446e-02, PNorm = 132.9412, GNorm = 0.1903, lr_0 = 5.4373e-04
Loss = 1.4931e-02, PNorm = 132.9711, GNorm = 0.2356, lr_0 = 5.4336e-04
Loss = 1.2699e-02, PNorm = 133.0043, GNorm = 0.2633, lr_0 = 5.4299e-04
Loss = 1.4227e-02, PNorm = 133.0385, GNorm = 0.6696, lr_0 = 5.4262e-04
Loss = 1.5456e-02, PNorm = 133.0679, GNorm = 0.3766, lr_0 = 5.4225e-04
Loss = 1.3351e-02, PNorm = 133.1035, GNorm = 0.4109, lr_0 = 5.4187e-04
Loss = 1.6661e-02, PNorm = 133.1272, GNorm = 0.6859, lr_0 = 5.4150e-04
Loss = 1.4142e-02, PNorm = 133.1621, GNorm = 0.1810, lr_0 = 5.4113e-04
Loss = 1.5098e-02, PNorm = 133.1961, GNorm = 0.5494, lr_0 = 5.4076e-04
Loss = 2.0981e-02, PNorm = 133.2323, GNorm = 0.5344, lr_0 = 5.4039e-04
Loss = 1.7636e-02, PNorm = 133.2658, GNorm = 0.4481, lr_0 = 5.4002e-04
Loss = 1.5852e-02, PNorm = 133.3031, GNorm = 0.4466, lr_0 = 5.3965e-04
Loss = 1.4135e-02, PNorm = 133.3404, GNorm = 0.5087, lr_0 = 5.3928e-04
Loss = 1.3288e-02, PNorm = 133.3726, GNorm = 0.3199, lr_0 = 5.3891e-04
Loss = 1.9396e-02, PNorm = 133.4001, GNorm = 0.3867, lr_0 = 5.3854e-04
Loss = 1.3792e-02, PNorm = 133.4364, GNorm = 0.1882, lr_0 = 5.3817e-04
Loss = 1.5385e-02, PNorm = 133.4718, GNorm = 0.5832, lr_0 = 5.3781e-04
Loss = 1.3740e-02, PNorm = 133.5145, GNorm = 0.4021, lr_0 = 5.3744e-04
Loss = 1.8269e-02, PNorm = 133.5498, GNorm = 0.4201, lr_0 = 5.3707e-04
Loss = 1.6095e-02, PNorm = 133.5837, GNorm = 0.4053, lr_0 = 5.3670e-04
Loss = 1.3685e-02, PNorm = 133.6184, GNorm = 0.1868, lr_0 = 5.3633e-04
Loss = 1.6618e-02, PNorm = 133.6514, GNorm = 0.5844, lr_0 = 5.3597e-04
Loss = 1.5188e-02, PNorm = 133.6906, GNorm = 0.1403, lr_0 = 5.3560e-04
Loss = 1.8511e-02, PNorm = 133.7349, GNorm = 0.2194, lr_0 = 5.3523e-04
Loss = 1.3116e-02, PNorm = 133.7705, GNorm = 0.4184, lr_0 = 5.3486e-04
Loss = 1.3239e-02, PNorm = 133.7992, GNorm = 0.2892, lr_0 = 5.3450e-04
Loss = 1.4546e-02, PNorm = 133.8350, GNorm = 0.4627, lr_0 = 5.3413e-04
Loss = 1.3084e-02, PNorm = 133.8631, GNorm = 0.1277, lr_0 = 5.3377e-04
Loss = 1.6848e-02, PNorm = 133.8936, GNorm = 0.4141, lr_0 = 5.3340e-04
Loss = 1.5567e-02, PNorm = 133.9246, GNorm = 0.3001, lr_0 = 5.3304e-04
Loss = 1.3017e-02, PNorm = 133.9562, GNorm = 0.4274, lr_0 = 5.3267e-04
Loss = 1.4126e-02, PNorm = 133.9871, GNorm = 0.3053, lr_0 = 5.3231e-04
Loss = 1.4875e-02, PNorm = 134.0222, GNorm = 0.1462, lr_0 = 5.3194e-04
Loss = 1.5405e-02, PNorm = 134.0569, GNorm = 0.2401, lr_0 = 5.3158e-04
Loss = 1.4661e-02, PNorm = 134.0910, GNorm = 0.2476, lr_0 = 5.3121e-04
Loss = 1.4986e-02, PNorm = 134.1220, GNorm = 0.4158, lr_0 = 5.3085e-04
Loss = 1.3850e-02, PNorm = 134.1542, GNorm = 0.2943, lr_0 = 5.3048e-04
Loss = 1.4115e-02, PNorm = 134.1913, GNorm = 0.2015, lr_0 = 5.3012e-04
Loss = 1.3512e-02, PNorm = 134.2258, GNorm = 0.1459, lr_0 = 5.2976e-04
Loss = 1.6525e-02, PNorm = 134.2590, GNorm = 0.4503, lr_0 = 5.2939e-04
Loss = 1.7756e-02, PNorm = 134.2907, GNorm = 0.6223, lr_0 = 5.2903e-04
Loss = 1.5002e-02, PNorm = 134.3236, GNorm = 0.2057, lr_0 = 5.2867e-04
Loss = 1.2621e-02, PNorm = 134.3558, GNorm = 0.3807, lr_0 = 5.2831e-04
Loss = 1.2794e-02, PNorm = 134.3880, GNorm = 0.2190, lr_0 = 5.2795e-04
Loss = 1.3600e-02, PNorm = 134.4186, GNorm = 0.2067, lr_0 = 5.2758e-04
Loss = 1.4959e-02, PNorm = 134.4488, GNorm = 0.4570, lr_0 = 5.2722e-04
Loss = 1.3380e-02, PNorm = 134.4788, GNorm = 0.3202, lr_0 = 5.2686e-04
Loss = 1.7475e-02, PNorm = 134.5148, GNorm = 0.2754, lr_0 = 5.2650e-04
Loss = 1.5086e-02, PNorm = 134.5500, GNorm = 0.2251, lr_0 = 5.2614e-04
Loss = 1.4173e-02, PNorm = 134.5825, GNorm = 0.2783, lr_0 = 5.2578e-04
Loss = 1.3132e-02, PNorm = 134.6133, GNorm = 0.2781, lr_0 = 5.2542e-04
Loss = 1.1491e-02, PNorm = 134.6413, GNorm = 0.2371, lr_0 = 5.2506e-04
Loss = 1.5289e-02, PNorm = 134.6762, GNorm = 0.2017, lr_0 = 5.2470e-04
Loss = 1.5275e-02, PNorm = 134.7095, GNorm = 0.5520, lr_0 = 5.2434e-04
Loss = 1.4137e-02, PNorm = 134.7496, GNorm = 0.3113, lr_0 = 5.2398e-04
Loss = 1.3907e-02, PNorm = 134.7837, GNorm = 0.3704, lr_0 = 5.2362e-04
Loss = 1.4596e-02, PNorm = 134.8158, GNorm = 0.4217, lr_0 = 5.2326e-04
Loss = 2.0595e-02, PNorm = 134.8527, GNorm = 0.2740, lr_0 = 5.2290e-04
Loss = 1.4775e-02, PNorm = 134.8933, GNorm = 0.2514, lr_0 = 5.2255e-04
Loss = 1.3273e-02, PNorm = 134.9339, GNorm = 0.2336, lr_0 = 5.2219e-04
Loss = 1.6186e-02, PNorm = 134.9638, GNorm = 0.2711, lr_0 = 5.2183e-04
Loss = 1.7298e-02, PNorm = 134.9980, GNorm = 0.3627, lr_0 = 5.2147e-04
Loss = 1.4977e-02, PNorm = 135.0372, GNorm = 0.2130, lr_0 = 5.2112e-04
Loss = 1.3546e-02, PNorm = 135.0751, GNorm = 0.2326, lr_0 = 5.2076e-04
Loss = 1.3700e-02, PNorm = 135.1133, GNorm = 0.2599, lr_0 = 5.2040e-04
Loss = 1.4403e-02, PNorm = 135.1447, GNorm = 0.3811, lr_0 = 5.2005e-04
Loss = 1.4938e-02, PNorm = 135.1766, GNorm = 0.5409, lr_0 = 5.1969e-04
Loss = 1.4720e-02, PNorm = 135.2064, GNorm = 0.3674, lr_0 = 5.1933e-04
Loss = 1.4510e-02, PNorm = 135.2341, GNorm = 0.2316, lr_0 = 5.1898e-04
Loss = 1.4786e-02, PNorm = 135.2609, GNorm = 0.2975, lr_0 = 5.1862e-04
Loss = 1.3966e-02, PNorm = 135.2926, GNorm = 0.6122, lr_0 = 5.1827e-04
Loss = 1.7230e-02, PNorm = 135.3305, GNorm = 0.5941, lr_0 = 5.1791e-04
Validation mae = 0.485483
Epoch 10
Loss = 1.3186e-02, PNorm = 135.3600, GNorm = 0.2881, lr_0 = 5.1756e-04
Loss = 1.0674e-02, PNorm = 135.3814, GNorm = 0.3191, lr_0 = 5.1720e-04
Loss = 1.4733e-02, PNorm = 135.4041, GNorm = 0.2541, lr_0 = 5.1685e-04
Loss = 1.4647e-02, PNorm = 135.4284, GNorm = 0.1740, lr_0 = 5.1649e-04
Loss = 1.3112e-02, PNorm = 135.4533, GNorm = 0.1544, lr_0 = 5.1614e-04
Loss = 1.1444e-02, PNorm = 135.4780, GNorm = 0.2460, lr_0 = 5.1579e-04
Loss = 1.0537e-02, PNorm = 135.5009, GNorm = 0.3055, lr_0 = 5.1543e-04
Loss = 1.3814e-02, PNorm = 135.5283, GNorm = 0.5429, lr_0 = 5.1508e-04
Loss = 1.2924e-02, PNorm = 135.5558, GNorm = 0.6539, lr_0 = 5.1473e-04
Loss = 1.1875e-02, PNorm = 135.5851, GNorm = 0.2602, lr_0 = 5.1437e-04
Loss = 1.2598e-02, PNorm = 135.6106, GNorm = 0.4532, lr_0 = 5.1402e-04
Loss = 1.3734e-02, PNorm = 135.6386, GNorm = 0.5412, lr_0 = 5.1367e-04
Loss = 1.2503e-02, PNorm = 135.6643, GNorm = 0.4613, lr_0 = 5.1332e-04
Loss = 1.2227e-02, PNorm = 135.6863, GNorm = 0.2015, lr_0 = 5.1297e-04
Loss = 1.2492e-02, PNorm = 135.7032, GNorm = 0.3014, lr_0 = 5.1262e-04
Loss = 1.1337e-02, PNorm = 135.7237, GNorm = 0.4776, lr_0 = 5.1226e-04
Loss = 1.1714e-02, PNorm = 135.7493, GNorm = 0.2062, lr_0 = 5.1191e-04
Loss = 1.0626e-02, PNorm = 135.7726, GNorm = 0.2913, lr_0 = 5.1156e-04
Loss = 1.2004e-02, PNorm = 135.7966, GNorm = 0.3492, lr_0 = 5.1121e-04
Loss = 1.1241e-02, PNorm = 135.8205, GNorm = 0.1226, lr_0 = 5.1086e-04
Loss = 1.0800e-02, PNorm = 135.8455, GNorm = 0.3388, lr_0 = 5.1051e-04
Loss = 1.3030e-02, PNorm = 135.8710, GNorm = 0.1729, lr_0 = 5.1016e-04
Loss = 1.5095e-02, PNorm = 135.8952, GNorm = 0.3161, lr_0 = 5.0981e-04
Loss = 1.4287e-02, PNorm = 135.9270, GNorm = 0.2436, lr_0 = 5.0946e-04
Loss = 1.1984e-02, PNorm = 135.9591, GNorm = 0.4267, lr_0 = 5.0911e-04
Loss = 1.1557e-02, PNorm = 135.9877, GNorm = 0.1785, lr_0 = 5.0877e-04
Loss = 9.9133e-03, PNorm = 136.0091, GNorm = 0.3052, lr_0 = 5.0842e-04
Loss = 1.1893e-02, PNorm = 136.0225, GNorm = 0.8977, lr_0 = 5.0807e-04
Loss = 1.2369e-02, PNorm = 136.0391, GNorm = 0.1658, lr_0 = 5.0772e-04
Loss = 1.0151e-02, PNorm = 136.0661, GNorm = 0.2808, lr_0 = 5.0737e-04
Loss = 1.1926e-02, PNorm = 136.0925, GNorm = 0.2483, lr_0 = 5.0703e-04
Loss = 1.3011e-02, PNorm = 136.1203, GNorm = 0.2902, lr_0 = 5.0668e-04
Loss = 1.0488e-02, PNorm = 136.1468, GNorm = 0.1834, lr_0 = 5.0633e-04
Loss = 1.0563e-02, PNorm = 136.1720, GNorm = 0.2917, lr_0 = 5.0598e-04
Loss = 1.3113e-02, PNorm = 136.1979, GNorm = 0.3991, lr_0 = 5.0564e-04
Loss = 1.5062e-02, PNorm = 136.2201, GNorm = 0.5963, lr_0 = 5.0529e-04
Loss = 1.2144e-02, PNorm = 136.2492, GNorm = 0.2941, lr_0 = 5.0494e-04
Loss = 1.1916e-02, PNorm = 136.2747, GNorm = 0.3462, lr_0 = 5.0460e-04
Loss = 1.2384e-02, PNorm = 136.3026, GNorm = 0.2844, lr_0 = 5.0425e-04
Loss = 1.2817e-02, PNorm = 136.3314, GNorm = 0.1815, lr_0 = 5.0391e-04
Loss = 1.1579e-02, PNorm = 136.3588, GNorm = 0.1697, lr_0 = 5.0356e-04
Loss = 1.0325e-02, PNorm = 136.3873, GNorm = 0.1813, lr_0 = 5.0322e-04
Loss = 1.1399e-02, PNorm = 136.4155, GNorm = 0.1362, lr_0 = 5.0287e-04
Loss = 1.1299e-02, PNorm = 136.4394, GNorm = 0.2055, lr_0 = 5.0253e-04
Loss = 1.5892e-02, PNorm = 136.4644, GNorm = 0.3817, lr_0 = 5.0218e-04
Loss = 1.3166e-02, PNorm = 136.4939, GNorm = 0.6009, lr_0 = 5.0184e-04
Loss = 1.2283e-02, PNorm = 136.5229, GNorm = 0.2738, lr_0 = 5.0150e-04
Loss = 1.2049e-02, PNorm = 136.5509, GNorm = 0.3744, lr_0 = 5.0115e-04
Loss = 1.1556e-02, PNorm = 136.5787, GNorm = 0.3786, lr_0 = 5.0081e-04
Loss = 1.4602e-02, PNorm = 136.6087, GNorm = 0.1976, lr_0 = 5.0047e-04
Loss = 1.1421e-02, PNorm = 136.6365, GNorm = 0.4770, lr_0 = 5.0012e-04
Loss = 1.1199e-02, PNorm = 136.6653, GNorm = 0.2657, lr_0 = 4.9978e-04
Loss = 1.2098e-02, PNorm = 136.6944, GNorm = 0.3477, lr_0 = 4.9944e-04
Loss = 1.2517e-02, PNorm = 136.7194, GNorm = 0.5598, lr_0 = 4.9910e-04
Loss = 1.3318e-02, PNorm = 136.7459, GNorm = 0.3658, lr_0 = 4.9875e-04
Loss = 1.5465e-02, PNorm = 136.7757, GNorm = 0.3444, lr_0 = 4.9841e-04
Loss = 1.2189e-02, PNorm = 136.8041, GNorm = 0.2430, lr_0 = 4.9807e-04
Loss = 1.2310e-02, PNorm = 136.8358, GNorm = 0.2891, lr_0 = 4.9773e-04
Loss = 9.6512e-03, PNorm = 136.8676, GNorm = 0.4993, lr_0 = 4.9739e-04
Loss = 1.2206e-02, PNorm = 136.8935, GNorm = 0.6057, lr_0 = 4.9705e-04
Loss = 1.4832e-02, PNorm = 136.9252, GNorm = 0.3755, lr_0 = 4.9671e-04
Loss = 1.0240e-02, PNorm = 136.9590, GNorm = 0.3063, lr_0 = 4.9637e-04
Loss = 1.2352e-02, PNorm = 136.9889, GNorm = 0.2563, lr_0 = 4.9603e-04
Loss = 1.4520e-02, PNorm = 137.0200, GNorm = 0.2429, lr_0 = 4.9569e-04
Loss = 1.2708e-02, PNorm = 137.0490, GNorm = 0.3033, lr_0 = 4.9535e-04
Loss = 1.2346e-02, PNorm = 137.0799, GNorm = 0.2481, lr_0 = 4.9501e-04
Loss = 1.1299e-02, PNorm = 137.1054, GNorm = 0.2398, lr_0 = 4.9467e-04
Loss = 1.1919e-02, PNorm = 137.1344, GNorm = 0.2579, lr_0 = 4.9433e-04
Loss = 1.2784e-02, PNorm = 137.1581, GNorm = 0.3759, lr_0 = 4.9399e-04
Loss = 1.1884e-02, PNorm = 137.1855, GNorm = 0.2010, lr_0 = 4.9365e-04
Loss = 1.2249e-02, PNorm = 137.2153, GNorm = 0.3868, lr_0 = 4.9332e-04
Loss = 1.2180e-02, PNorm = 137.2414, GNorm = 0.1936, lr_0 = 4.9298e-04
Loss = 1.4052e-02, PNorm = 137.2752, GNorm = 0.1677, lr_0 = 4.9264e-04
Loss = 1.5284e-02, PNorm = 137.3062, GNorm = 0.4229, lr_0 = 4.9230e-04
Loss = 1.1887e-02, PNorm = 137.3342, GNorm = 0.3547, lr_0 = 4.9197e-04
Loss = 1.1199e-02, PNorm = 137.3655, GNorm = 0.3132, lr_0 = 4.9163e-04
Loss = 1.0700e-02, PNorm = 137.3949, GNorm = 0.1395, lr_0 = 4.9129e-04
Loss = 1.2204e-02, PNorm = 137.4299, GNorm = 0.4240, lr_0 = 4.9095e-04
Loss = 1.2207e-02, PNorm = 137.4615, GNorm = 0.2086, lr_0 = 4.9062e-04
Loss = 1.2985e-02, PNorm = 137.4931, GNorm = 0.1365, lr_0 = 4.9028e-04
Loss = 1.0682e-02, PNorm = 137.5182, GNorm = 0.1452, lr_0 = 4.8995e-04
Loss = 1.3093e-02, PNorm = 137.5420, GNorm = 0.6876, lr_0 = 4.8961e-04
Loss = 1.4737e-02, PNorm = 137.5708, GNorm = 0.2069, lr_0 = 4.8928e-04
Loss = 1.4738e-02, PNorm = 137.6007, GNorm = 0.2902, lr_0 = 4.8894e-04
Loss = 1.2858e-02, PNorm = 137.6373, GNorm = 0.5494, lr_0 = 4.8861e-04
Loss = 1.0377e-02, PNorm = 137.6707, GNorm = 0.4559, lr_0 = 4.8827e-04
Loss = 1.2222e-02, PNorm = 137.6988, GNorm = 0.1890, lr_0 = 4.8794e-04
Loss = 1.3378e-02, PNorm = 137.7239, GNorm = 0.5896, lr_0 = 4.8760e-04
Loss = 1.0731e-02, PNorm = 137.7481, GNorm = 0.1305, lr_0 = 4.8727e-04
Loss = 1.3340e-02, PNorm = 137.7707, GNorm = 0.3114, lr_0 = 4.8693e-04
Loss = 1.1767e-02, PNorm = 137.7996, GNorm = 0.1615, lr_0 = 4.8660e-04
Loss = 1.0997e-02, PNorm = 137.8273, GNorm = 0.2716, lr_0 = 4.8627e-04
Loss = 1.2160e-02, PNorm = 137.8557, GNorm = 0.1899, lr_0 = 4.8593e-04
Loss = 9.9232e-03, PNorm = 137.8895, GNorm = 0.2350, lr_0 = 4.8560e-04
Loss = 1.1343e-02, PNorm = 137.9147, GNorm = 0.4957, lr_0 = 4.8527e-04
Loss = 1.9624e-02, PNorm = 137.9397, GNorm = 0.1465, lr_0 = 4.8494e-04
Loss = 1.1395e-02, PNorm = 137.9683, GNorm = 0.3741, lr_0 = 4.8460e-04
Loss = 1.1749e-02, PNorm = 137.9948, GNorm = 0.3652, lr_0 = 4.8427e-04
Loss = 1.2373e-02, PNorm = 138.0255, GNorm = 0.0993, lr_0 = 4.8394e-04
Loss = 1.2606e-02, PNorm = 138.0614, GNorm = 0.4788, lr_0 = 4.8361e-04
Loss = 1.3805e-02, PNorm = 138.0892, GNorm = 0.2351, lr_0 = 4.8328e-04
Loss = 1.2925e-02, PNorm = 138.1181, GNorm = 0.2442, lr_0 = 4.8295e-04
Loss = 1.3729e-02, PNorm = 138.1447, GNorm = 0.2879, lr_0 = 4.8262e-04
Loss = 1.0065e-02, PNorm = 138.1743, GNorm = 0.2440, lr_0 = 4.8228e-04
Loss = 1.0272e-02, PNorm = 138.2042, GNorm = 0.4429, lr_0 = 4.8195e-04
Loss = 9.8352e-03, PNorm = 138.2343, GNorm = 0.1663, lr_0 = 4.8162e-04
Loss = 1.0378e-02, PNorm = 138.2599, GNorm = 0.3921, lr_0 = 4.8129e-04
Loss = 1.4039e-02, PNorm = 138.2848, GNorm = 0.2849, lr_0 = 4.8096e-04
Loss = 1.1398e-02, PNorm = 138.3082, GNorm = 0.1420, lr_0 = 4.8064e-04
Loss = 1.4775e-02, PNorm = 138.3317, GNorm = 0.6276, lr_0 = 4.8031e-04
Loss = 1.2270e-02, PNorm = 138.3607, GNorm = 0.4483, lr_0 = 4.7998e-04
Loss = 1.2289e-02, PNorm = 138.3912, GNorm = 0.1682, lr_0 = 4.7965e-04
Loss = 1.0772e-02, PNorm = 138.4204, GNorm = 0.1085, lr_0 = 4.7932e-04
Loss = 1.3083e-02, PNorm = 138.4473, GNorm = 0.3092, lr_0 = 4.7899e-04
Loss = 1.1168e-02, PNorm = 138.4762, GNorm = 0.1527, lr_0 = 4.7866e-04
Loss = 1.1438e-02, PNorm = 138.5023, GNorm = 0.1829, lr_0 = 4.7833e-04
Loss = 1.4199e-02, PNorm = 138.5284, GNorm = 0.2637, lr_0 = 4.7801e-04
Loss = 1.1343e-02, PNorm = 138.5587, GNorm = 0.2980, lr_0 = 4.7768e-04
Loss = 1.1594e-02, PNorm = 138.5890, GNorm = 0.2470, lr_0 = 4.7735e-04
Loss = 1.0363e-02, PNorm = 138.6208, GNorm = 0.4143, lr_0 = 4.7703e-04
Validation mae = 0.479760
Epoch 11
Loss = 8.4334e-03, PNorm = 138.6432, GNorm = 0.2436, lr_0 = 4.7670e-04
Loss = 1.1348e-02, PNorm = 138.6603, GNorm = 0.2736, lr_0 = 4.7637e-04
Loss = 8.8230e-03, PNorm = 138.6798, GNorm = 0.4422, lr_0 = 4.7605e-04
Loss = 1.1261e-02, PNorm = 138.6993, GNorm = 0.1260, lr_0 = 4.7572e-04
Loss = 1.2006e-02, PNorm = 138.7194, GNorm = 0.1168, lr_0 = 4.7539e-04
Loss = 1.2050e-02, PNorm = 138.7387, GNorm = 0.2886, lr_0 = 4.7507e-04
Loss = 9.0558e-03, PNorm = 138.7577, GNorm = 0.3986, lr_0 = 4.7474e-04
Loss = 1.3069e-02, PNorm = 138.7743, GNorm = 0.8857, lr_0 = 4.7442e-04
Loss = 9.7917e-03, PNorm = 138.7986, GNorm = 0.1153, lr_0 = 4.7409e-04
Loss = 9.8107e-03, PNorm = 138.8253, GNorm = 0.1605, lr_0 = 4.7377e-04
Loss = 1.1059e-02, PNorm = 138.8451, GNorm = 0.2781, lr_0 = 4.7344e-04
Loss = 9.6809e-03, PNorm = 138.8609, GNorm = 0.3514, lr_0 = 4.7312e-04
Loss = 1.0414e-02, PNorm = 138.8832, GNorm = 0.2184, lr_0 = 4.7279e-04
Loss = 8.8004e-03, PNorm = 138.9067, GNorm = 0.3577, lr_0 = 4.7247e-04
Loss = 1.1770e-02, PNorm = 138.9317, GNorm = 0.7211, lr_0 = 4.7215e-04
Loss = 1.1809e-02, PNorm = 138.9529, GNorm = 0.1947, lr_0 = 4.7182e-04
Loss = 9.7825e-03, PNorm = 138.9738, GNorm = 0.3206, lr_0 = 4.7150e-04
Loss = 9.5625e-03, PNorm = 138.9991, GNorm = 0.3032, lr_0 = 4.7118e-04
Loss = 8.7158e-03, PNorm = 139.0224, GNorm = 0.3059, lr_0 = 4.7085e-04
Loss = 1.0321e-02, PNorm = 139.0439, GNorm = 0.3110, lr_0 = 4.7053e-04
Loss = 9.4185e-03, PNorm = 139.0650, GNorm = 0.2293, lr_0 = 4.7021e-04
Loss = 9.3288e-03, PNorm = 139.0847, GNorm = 0.1724, lr_0 = 4.6989e-04
Loss = 9.9248e-03, PNorm = 139.1093, GNorm = 0.1614, lr_0 = 4.6957e-04
Loss = 1.1092e-02, PNorm = 139.1341, GNorm = 0.2109, lr_0 = 4.6924e-04
Loss = 1.0019e-02, PNorm = 139.1561, GNorm = 0.1413, lr_0 = 4.6892e-04
Loss = 1.1321e-02, PNorm = 139.1806, GNorm = 0.1695, lr_0 = 4.6860e-04
Loss = 9.2115e-03, PNorm = 139.2094, GNorm = 0.2191, lr_0 = 4.6828e-04
Loss = 8.4345e-03, PNorm = 139.2304, GNorm = 0.1561, lr_0 = 4.6796e-04
Loss = 1.0703e-02, PNorm = 139.2489, GNorm = 0.1132, lr_0 = 4.6764e-04
Loss = 1.1650e-02, PNorm = 139.2703, GNorm = 0.3065, lr_0 = 4.6732e-04
Loss = 1.1064e-02, PNorm = 139.2939, GNorm = 0.3890, lr_0 = 4.6700e-04
Loss = 1.0018e-02, PNorm = 139.3211, GNorm = 0.2270, lr_0 = 4.6668e-04
Loss = 9.9278e-03, PNorm = 139.3459, GNorm = 0.1367, lr_0 = 4.6636e-04
Loss = 9.5159e-03, PNorm = 139.3676, GNorm = 0.1385, lr_0 = 4.6604e-04
Loss = 8.7067e-03, PNorm = 139.3926, GNorm = 0.1862, lr_0 = 4.6572e-04
Loss = 9.5082e-03, PNorm = 139.4125, GNorm = 0.2261, lr_0 = 4.6540e-04
Loss = 1.0159e-02, PNorm = 139.4308, GNorm = 0.2267, lr_0 = 4.6508e-04
Loss = 1.1049e-02, PNorm = 139.4493, GNorm = 0.2123, lr_0 = 4.6476e-04
Loss = 8.9375e-03, PNorm = 139.4686, GNorm = 0.0840, lr_0 = 4.6445e-04
Loss = 9.7346e-03, PNorm = 139.4904, GNorm = 0.1623, lr_0 = 4.6413e-04
Loss = 9.5719e-03, PNorm = 139.5126, GNorm = 0.3374, lr_0 = 4.6381e-04
Loss = 1.1438e-02, PNorm = 139.5398, GNorm = 0.2332, lr_0 = 4.6349e-04
Loss = 1.1268e-02, PNorm = 139.5660, GNorm = 0.3347, lr_0 = 4.6317e-04
Loss = 1.0847e-02, PNorm = 139.5889, GNorm = 0.4133, lr_0 = 4.6286e-04
Loss = 9.7220e-03, PNorm = 139.6120, GNorm = 0.1957, lr_0 = 4.6254e-04
Loss = 1.0114e-02, PNorm = 139.6346, GNorm = 0.1454, lr_0 = 4.6222e-04
Loss = 9.1548e-03, PNorm = 139.6579, GNorm = 0.1291, lr_0 = 4.6191e-04
Loss = 9.7533e-03, PNorm = 139.6811, GNorm = 0.2227, lr_0 = 4.6159e-04
Loss = 1.1352e-02, PNorm = 139.6969, GNorm = 0.3504, lr_0 = 4.6127e-04
Loss = 1.0756e-02, PNorm = 139.7202, GNorm = 0.4490, lr_0 = 4.6096e-04
Loss = 1.3847e-02, PNorm = 139.7428, GNorm = 0.5618, lr_0 = 4.6064e-04
Loss = 1.2098e-02, PNorm = 139.7623, GNorm = 0.5003, lr_0 = 4.6033e-04
Loss = 1.1084e-02, PNorm = 139.7891, GNorm = 0.2100, lr_0 = 4.6001e-04
Loss = 1.1950e-02, PNorm = 139.8083, GNorm = 0.1089, lr_0 = 4.5970e-04
Loss = 1.0113e-02, PNorm = 139.8319, GNorm = 0.3842, lr_0 = 4.5938e-04
Loss = 9.7921e-03, PNorm = 139.8537, GNorm = 0.3639, lr_0 = 4.5907e-04
Loss = 9.7700e-03, PNorm = 139.8758, GNorm = 0.3435, lr_0 = 4.5875e-04
Loss = 9.6635e-03, PNorm = 139.8984, GNorm = 0.2746, lr_0 = 4.5844e-04
Loss = 1.0106e-02, PNorm = 139.9208, GNorm = 0.1408, lr_0 = 4.5812e-04
Loss = 8.2960e-03, PNorm = 139.9436, GNorm = 0.2454, lr_0 = 4.5781e-04
Loss = 8.8097e-03, PNorm = 139.9670, GNorm = 0.1811, lr_0 = 4.5750e-04
Loss = 1.0383e-02, PNorm = 139.9893, GNorm = 0.5044, lr_0 = 4.5718e-04
Loss = 9.8175e-03, PNorm = 140.0078, GNorm = 0.3518, lr_0 = 4.5687e-04
Loss = 1.1379e-02, PNorm = 140.0286, GNorm = 0.3330, lr_0 = 4.5656e-04
Loss = 1.0823e-02, PNorm = 140.0532, GNorm = 0.1946, lr_0 = 4.5624e-04
Loss = 1.0300e-02, PNorm = 140.0799, GNorm = 0.4039, lr_0 = 4.5593e-04
Loss = 9.4579e-03, PNorm = 140.1082, GNorm = 0.1472, lr_0 = 4.5562e-04
Loss = 1.3734e-02, PNorm = 140.1291, GNorm = 0.1486, lr_0 = 4.5531e-04
Loss = 1.0996e-02, PNorm = 140.1489, GNorm = 0.5590, lr_0 = 4.5499e-04
Loss = 9.2777e-03, PNorm = 140.1704, GNorm = 0.1834, lr_0 = 4.5468e-04
Loss = 9.4530e-03, PNorm = 140.1930, GNorm = 0.1662, lr_0 = 4.5437e-04
Loss = 1.0567e-02, PNorm = 140.2185, GNorm = 0.1218, lr_0 = 4.5406e-04
Loss = 1.1342e-02, PNorm = 140.2451, GNorm = 0.3108, lr_0 = 4.5375e-04
Loss = 9.3006e-03, PNorm = 140.2736, GNorm = 0.4946, lr_0 = 4.5344e-04
Loss = 1.0842e-02, PNorm = 140.2990, GNorm = 0.2276, lr_0 = 4.5313e-04
Loss = 1.0788e-02, PNorm = 140.3253, GNorm = 0.6397, lr_0 = 4.5282e-04
Loss = 1.2139e-02, PNorm = 140.3495, GNorm = 0.4080, lr_0 = 4.5251e-04
Loss = 9.0204e-03, PNorm = 140.3719, GNorm = 0.1026, lr_0 = 4.5220e-04
Loss = 1.1564e-02, PNorm = 140.3995, GNorm = 0.2687, lr_0 = 4.5189e-04
Loss = 1.2159e-02, PNorm = 140.4297, GNorm = 0.1412, lr_0 = 4.5158e-04
Loss = 9.5521e-03, PNorm = 140.4559, GNorm = 0.0968, lr_0 = 4.5127e-04
Loss = 9.9454e-03, PNorm = 140.4795, GNorm = 0.3972, lr_0 = 4.5096e-04
Loss = 1.1584e-02, PNorm = 140.5061, GNorm = 0.0857, lr_0 = 4.5065e-04
Loss = 1.1002e-02, PNorm = 140.5285, GNorm = 0.1492, lr_0 = 4.5034e-04
Loss = 8.4586e-03, PNorm = 140.5498, GNorm = 0.2569, lr_0 = 4.5003e-04
Loss = 1.5791e-02, PNorm = 140.5718, GNorm = 0.1552, lr_0 = 4.4972e-04
Loss = 1.0599e-02, PNorm = 140.5930, GNorm = 0.6089, lr_0 = 4.4942e-04
Loss = 8.5686e-03, PNorm = 140.6168, GNorm = 0.1063, lr_0 = 4.4911e-04
Loss = 8.3660e-03, PNorm = 140.6410, GNorm = 0.2185, lr_0 = 4.4880e-04
Loss = 8.5089e-03, PNorm = 140.6613, GNorm = 0.1656, lr_0 = 4.4849e-04
Loss = 1.1401e-02, PNorm = 140.6832, GNorm = 0.2580, lr_0 = 4.4819e-04
Loss = 9.9996e-03, PNorm = 140.7089, GNorm = 0.2927, lr_0 = 4.4788e-04
Loss = 1.1899e-02, PNorm = 140.7365, GNorm = 0.5058, lr_0 = 4.4757e-04
Loss = 9.6862e-03, PNorm = 140.7664, GNorm = 0.2614, lr_0 = 4.4727e-04
Loss = 1.0706e-02, PNorm = 140.7880, GNorm = 0.3598, lr_0 = 4.4696e-04
Loss = 9.2897e-03, PNorm = 140.8069, GNorm = 0.1779, lr_0 = 4.4665e-04
Loss = 1.0003e-02, PNorm = 140.8292, GNorm = 0.1397, lr_0 = 4.4635e-04
Loss = 1.0463e-02, PNorm = 140.8590, GNorm = 0.2591, lr_0 = 4.4604e-04
Loss = 1.1658e-02, PNorm = 140.8886, GNorm = 0.1345, lr_0 = 4.4574e-04
Loss = 1.0807e-02, PNorm = 140.9144, GNorm = 0.3592, lr_0 = 4.4543e-04
Loss = 1.0792e-02, PNorm = 140.9352, GNorm = 0.4165, lr_0 = 4.4513e-04
Loss = 9.7961e-03, PNorm = 140.9621, GNorm = 0.3281, lr_0 = 4.4482e-04
Loss = 9.1173e-03, PNorm = 140.9893, GNorm = 0.1700, lr_0 = 4.4452e-04
Loss = 1.1151e-02, PNorm = 141.0130, GNorm = 0.1657, lr_0 = 4.4421e-04
Loss = 9.3468e-03, PNorm = 141.0367, GNorm = 0.4843, lr_0 = 4.4391e-04
Loss = 8.3479e-03, PNorm = 141.0601, GNorm = 0.2960, lr_0 = 4.4360e-04
Loss = 9.0223e-03, PNorm = 141.0874, GNorm = 0.1144, lr_0 = 4.4330e-04
Loss = 8.9235e-03, PNorm = 141.1112, GNorm = 0.2806, lr_0 = 4.4299e-04
Loss = 8.5626e-03, PNorm = 141.1300, GNorm = 0.2373, lr_0 = 4.4269e-04
Loss = 1.0761e-02, PNorm = 141.1536, GNorm = 0.3497, lr_0 = 4.4239e-04
Loss = 1.1641e-02, PNorm = 141.1793, GNorm = 0.3430, lr_0 = 4.4209e-04
Loss = 1.1029e-02, PNorm = 141.2030, GNorm = 0.3774, lr_0 = 4.4178e-04
Loss = 9.6876e-03, PNorm = 141.2294, GNorm = 0.1128, lr_0 = 4.4148e-04
Loss = 1.0523e-02, PNorm = 141.2574, GNorm = 0.2289, lr_0 = 4.4118e-04
Loss = 8.7018e-03, PNorm = 141.2792, GNorm = 0.2450, lr_0 = 4.4088e-04
Loss = 8.7422e-03, PNorm = 141.3029, GNorm = 0.3212, lr_0 = 4.4057e-04
Loss = 1.0356e-02, PNorm = 141.3220, GNorm = 0.2164, lr_0 = 4.4027e-04
Loss = 1.2114e-02, PNorm = 141.3456, GNorm = 0.2189, lr_0 = 4.3997e-04
Loss = 8.4771e-03, PNorm = 141.3685, GNorm = 0.2007, lr_0 = 4.3967e-04
Loss = 1.0863e-02, PNorm = 141.3898, GNorm = 0.2585, lr_0 = 4.3937e-04
Validation mae = 0.480147
Epoch 12
Loss = 9.1864e-03, PNorm = 141.4109, GNorm = 0.1208, lr_0 = 4.3907e-04
Loss = 9.6354e-03, PNorm = 141.4303, GNorm = 0.3725, lr_0 = 4.3877e-04
Loss = 8.5636e-03, PNorm = 141.4483, GNorm = 0.1950, lr_0 = 4.3846e-04
Loss = 7.4918e-03, PNorm = 141.4640, GNorm = 0.3267, lr_0 = 4.3816e-04
Loss = 8.2432e-03, PNorm = 141.4829, GNorm = 0.1395, lr_0 = 4.3786e-04
Loss = 9.4531e-03, PNorm = 141.4976, GNorm = 0.2107, lr_0 = 4.3756e-04
Loss = 9.5280e-03, PNorm = 141.5128, GNorm = 0.1942, lr_0 = 4.3726e-04
Loss = 1.0614e-02, PNorm = 141.5304, GNorm = 0.1305, lr_0 = 4.3696e-04
Loss = 7.7179e-03, PNorm = 141.5471, GNorm = 0.5326, lr_0 = 4.3667e-04
Loss = 7.9929e-03, PNorm = 141.5672, GNorm = 0.1205, lr_0 = 4.3637e-04
Loss = 8.5533e-03, PNorm = 141.5899, GNorm = 0.1469, lr_0 = 4.3607e-04
Loss = 8.9355e-03, PNorm = 141.6070, GNorm = 0.3510, lr_0 = 4.3577e-04
Loss = 9.3607e-03, PNorm = 141.6246, GNorm = 0.0879, lr_0 = 4.3547e-04
Loss = 8.8717e-03, PNorm = 141.6446, GNorm = 0.1191, lr_0 = 4.3517e-04
Loss = 8.6155e-03, PNorm = 141.6673, GNorm = 0.4208, lr_0 = 4.3487e-04
Loss = 9.9088e-03, PNorm = 141.6893, GNorm = 0.5892, lr_0 = 4.3458e-04
Loss = 1.1710e-02, PNorm = 141.7060, GNorm = 0.2996, lr_0 = 4.3428e-04
Loss = 8.3062e-03, PNorm = 141.7269, GNorm = 0.2540, lr_0 = 4.3398e-04
Loss = 8.9928e-03, PNorm = 141.7441, GNorm = 0.3418, lr_0 = 4.3368e-04
Loss = 9.1428e-03, PNorm = 141.7615, GNorm = 0.1444, lr_0 = 4.3339e-04
Loss = 7.2558e-03, PNorm = 141.7783, GNorm = 0.2119, lr_0 = 4.3309e-04
Loss = 7.1961e-03, PNorm = 141.7970, GNorm = 0.1323, lr_0 = 4.3279e-04
Loss = 8.5369e-03, PNorm = 141.8162, GNorm = 0.2516, lr_0 = 4.3250e-04
Loss = 9.3619e-03, PNorm = 141.8295, GNorm = 0.1267, lr_0 = 4.3220e-04
Loss = 8.2949e-03, PNorm = 141.8483, GNorm = 0.1511, lr_0 = 4.3190e-04
Loss = 7.1658e-03, PNorm = 141.8698, GNorm = 0.5091, lr_0 = 4.3161e-04
Loss = 1.0389e-02, PNorm = 141.8920, GNorm = 0.2907, lr_0 = 4.3131e-04
Loss = 1.1785e-02, PNorm = 141.9125, GNorm = 0.1746, lr_0 = 4.3102e-04
Loss = 1.2595e-02, PNorm = 141.9320, GNorm = 0.2540, lr_0 = 4.3072e-04
Loss = 9.0767e-03, PNorm = 141.9468, GNorm = 0.4482, lr_0 = 4.3043e-04
Loss = 9.0843e-03, PNorm = 141.9692, GNorm = 0.1800, lr_0 = 4.3013e-04
Loss = 8.3261e-03, PNorm = 141.9904, GNorm = 0.4956, lr_0 = 4.2984e-04
Loss = 8.4347e-03, PNorm = 142.0135, GNorm = 0.2309, lr_0 = 4.2954e-04
Loss = 8.5828e-03, PNorm = 142.0338, GNorm = 0.1113, lr_0 = 4.2925e-04
Loss = 7.6856e-03, PNorm = 142.0497, GNorm = 0.0855, lr_0 = 4.2895e-04
Loss = 8.6567e-03, PNorm = 142.0670, GNorm = 0.3263, lr_0 = 4.2866e-04
Loss = 8.1906e-03, PNorm = 142.0890, GNorm = 0.1014, lr_0 = 4.2837e-04
Loss = 9.4510e-03, PNorm = 142.1078, GNorm = 0.2178, lr_0 = 4.2807e-04
Loss = 9.8772e-03, PNorm = 142.1282, GNorm = 0.3809, lr_0 = 4.2778e-04
Loss = 8.9036e-03, PNorm = 142.1482, GNorm = 0.2117, lr_0 = 4.2749e-04
Loss = 7.2151e-03, PNorm = 142.1669, GNorm = 0.0773, lr_0 = 4.2719e-04
Loss = 8.3832e-03, PNorm = 142.1869, GNorm = 0.1717, lr_0 = 4.2690e-04
Loss = 9.3873e-03, PNorm = 142.2058, GNorm = 0.4716, lr_0 = 4.2661e-04
Loss = 1.1678e-02, PNorm = 142.2239, GNorm = 0.2880, lr_0 = 4.2632e-04
Loss = 7.9929e-03, PNorm = 142.2448, GNorm = 0.2726, lr_0 = 4.2602e-04
Loss = 6.8315e-03, PNorm = 142.2625, GNorm = 0.1821, lr_0 = 4.2573e-04
Loss = 9.7791e-03, PNorm = 142.2798, GNorm = 0.2358, lr_0 = 4.2544e-04
Loss = 8.5825e-03, PNorm = 142.3004, GNorm = 0.2189, lr_0 = 4.2515e-04
Loss = 8.0232e-03, PNorm = 142.3225, GNorm = 0.1816, lr_0 = 4.2486e-04
Loss = 7.0515e-03, PNorm = 142.3414, GNorm = 0.2308, lr_0 = 4.2457e-04
Loss = 8.0488e-03, PNorm = 142.3542, GNorm = 0.1524, lr_0 = 4.2428e-04
Loss = 7.5372e-03, PNorm = 142.3715, GNorm = 0.1102, lr_0 = 4.2399e-04
Loss = 7.4168e-03, PNorm = 142.3892, GNorm = 0.3252, lr_0 = 4.2370e-04
Loss = 1.1710e-02, PNorm = 142.4075, GNorm = 0.3067, lr_0 = 4.2340e-04
Loss = 1.1059e-02, PNorm = 142.4319, GNorm = 0.2562, lr_0 = 4.2311e-04
Loss = 7.1174e-03, PNorm = 142.4558, GNorm = 0.1686, lr_0 = 4.2283e-04
Loss = 6.9968e-03, PNorm = 142.4730, GNorm = 0.1072, lr_0 = 4.2254e-04
Loss = 8.9493e-03, PNorm = 142.4883, GNorm = 0.2353, lr_0 = 4.2225e-04
Loss = 7.9864e-03, PNorm = 142.5042, GNorm = 0.0776, lr_0 = 4.2196e-04
Loss = 7.9650e-03, PNorm = 142.5212, GNorm = 0.1233, lr_0 = 4.2167e-04
Loss = 8.4710e-03, PNorm = 142.5438, GNorm = 0.1861, lr_0 = 4.2138e-04
Loss = 8.4212e-03, PNorm = 142.5652, GNorm = 0.2341, lr_0 = 4.2109e-04
Loss = 9.7550e-03, PNorm = 142.5793, GNorm = 0.5851, lr_0 = 4.2080e-04
Loss = 8.2838e-03, PNorm = 142.5952, GNorm = 0.2202, lr_0 = 4.2051e-04
Loss = 7.8141e-03, PNorm = 142.6157, GNorm = 0.3066, lr_0 = 4.2023e-04
Loss = 7.2055e-03, PNorm = 142.6365, GNorm = 0.1544, lr_0 = 4.1994e-04
Loss = 7.3843e-03, PNorm = 142.6570, GNorm = 0.2471, lr_0 = 4.1965e-04
Loss = 8.1887e-03, PNorm = 142.6765, GNorm = 0.2528, lr_0 = 4.1936e-04
Loss = 1.2422e-02, PNorm = 142.6969, GNorm = 0.1432, lr_0 = 4.1907e-04
Loss = 7.6555e-03, PNorm = 142.7178, GNorm = 0.0916, lr_0 = 4.1879e-04
Loss = 7.0523e-03, PNorm = 142.7365, GNorm = 0.2909, lr_0 = 4.1850e-04
Loss = 7.3865e-03, PNorm = 142.7545, GNorm = 0.3545, lr_0 = 4.1821e-04
Loss = 7.7708e-03, PNorm = 142.7750, GNorm = 0.2221, lr_0 = 4.1793e-04
Loss = 6.9277e-03, PNorm = 142.7925, GNorm = 0.2289, lr_0 = 4.1764e-04
Loss = 7.4272e-03, PNorm = 142.8148, GNorm = 0.1310, lr_0 = 4.1736e-04
Loss = 7.2252e-03, PNorm = 142.8332, GNorm = 0.1039, lr_0 = 4.1707e-04
Loss = 7.9095e-03, PNorm = 142.8474, GNorm = 0.3180, lr_0 = 4.1678e-04
Loss = 9.1264e-03, PNorm = 142.8680, GNorm = 0.1442, lr_0 = 4.1650e-04
Loss = 8.2729e-03, PNorm = 142.8900, GNorm = 0.5643, lr_0 = 4.1621e-04
Loss = 7.5801e-03, PNorm = 142.9142, GNorm = 0.3054, lr_0 = 4.1593e-04
Loss = 8.0896e-03, PNorm = 142.9377, GNorm = 0.2790, lr_0 = 4.1564e-04
Loss = 8.8763e-03, PNorm = 142.9559, GNorm = 0.1582, lr_0 = 4.1536e-04
Loss = 8.2230e-03, PNorm = 142.9731, GNorm = 0.1445, lr_0 = 4.1507e-04
Loss = 6.9755e-03, PNorm = 142.9905, GNorm = 0.3267, lr_0 = 4.1479e-04
Loss = 8.6833e-03, PNorm = 143.0101, GNorm = 0.5050, lr_0 = 4.1450e-04
Loss = 1.0951e-02, PNorm = 143.0312, GNorm = 0.7260, lr_0 = 4.1422e-04
Loss = 8.9253e-03, PNorm = 143.0501, GNorm = 0.1515, lr_0 = 4.1394e-04
Loss = 7.1849e-03, PNorm = 143.0696, GNorm = 0.2357, lr_0 = 4.1365e-04
Loss = 8.8760e-03, PNorm = 143.0870, GNorm = 0.1562, lr_0 = 4.1337e-04
Loss = 7.9620e-03, PNorm = 143.1049, GNorm = 0.2122, lr_0 = 4.1309e-04
Loss = 8.9166e-03, PNorm = 143.1224, GNorm = 0.2988, lr_0 = 4.1280e-04
Loss = 8.3674e-03, PNorm = 143.1428, GNorm = 0.3183, lr_0 = 4.1252e-04
Loss = 8.4777e-03, PNorm = 143.1581, GNorm = 0.2567, lr_0 = 4.1224e-04
Loss = 9.8015e-03, PNorm = 143.1758, GNorm = 0.2471, lr_0 = 4.1196e-04
Loss = 8.1232e-03, PNorm = 143.1938, GNorm = 0.1564, lr_0 = 4.1167e-04
Loss = 1.0726e-02, PNorm = 143.2131, GNorm = 0.1745, lr_0 = 4.1139e-04
Loss = 1.0792e-02, PNorm = 143.2406, GNorm = 0.6949, lr_0 = 4.1111e-04
Loss = 9.0563e-03, PNorm = 143.2637, GNorm = 0.2089, lr_0 = 4.1083e-04
Loss = 7.7267e-03, PNorm = 143.2882, GNorm = 0.4413, lr_0 = 4.1055e-04
Loss = 9.6067e-03, PNorm = 143.3127, GNorm = 0.2523, lr_0 = 4.1027e-04
Loss = 1.0354e-02, PNorm = 143.3344, GNorm = 0.4690, lr_0 = 4.0998e-04
Loss = 1.0449e-02, PNorm = 143.3587, GNorm = 0.1518, lr_0 = 4.0970e-04
Loss = 9.2162e-03, PNorm = 143.3808, GNorm = 0.4969, lr_0 = 4.0942e-04
Loss = 8.2533e-03, PNorm = 143.4010, GNorm = 0.1193, lr_0 = 4.0914e-04
Loss = 8.2994e-03, PNorm = 143.4184, GNorm = 0.2601, lr_0 = 4.0886e-04
Loss = 7.5917e-03, PNorm = 143.4364, GNorm = 0.1036, lr_0 = 4.0858e-04
Loss = 8.7253e-03, PNorm = 143.4553, GNorm = 0.5045, lr_0 = 4.0830e-04
Loss = 7.5358e-03, PNorm = 143.4760, GNorm = 0.0992, lr_0 = 4.0802e-04
Loss = 1.1677e-02, PNorm = 143.4970, GNorm = 0.3978, lr_0 = 4.0774e-04
Loss = 7.3203e-03, PNorm = 143.5215, GNorm = 0.2628, lr_0 = 4.0746e-04
Loss = 9.5889e-03, PNorm = 143.5398, GNorm = 0.1447, lr_0 = 4.0718e-04
Loss = 7.6003e-03, PNorm = 143.5567, GNorm = 0.1913, lr_0 = 4.0691e-04
Loss = 8.1148e-03, PNorm = 143.5802, GNorm = 0.3281, lr_0 = 4.0663e-04
Loss = 7.7838e-03, PNorm = 143.6005, GNorm = 0.5193, lr_0 = 4.0635e-04
Loss = 9.0613e-03, PNorm = 143.6225, GNorm = 0.2787, lr_0 = 4.0607e-04
Loss = 8.2221e-03, PNorm = 143.6436, GNorm = 0.1079, lr_0 = 4.0579e-04
Loss = 1.0659e-02, PNorm = 143.6643, GNorm = 0.2026, lr_0 = 4.0551e-04
Loss = 8.1229e-03, PNorm = 143.6842, GNorm = 0.4635, lr_0 = 4.0524e-04
Loss = 8.0265e-03, PNorm = 143.7075, GNorm = 0.3599, lr_0 = 4.0496e-04
Loss = 1.0713e-02, PNorm = 143.7321, GNorm = 0.4911, lr_0 = 4.0468e-04
Validation mae = 0.480816
Epoch 13
Loss = 8.9366e-03, PNorm = 143.7579, GNorm = 0.2283, lr_0 = 4.0440e-04
Loss = 7.5187e-03, PNorm = 143.7782, GNorm = 0.2788, lr_0 = 4.0413e-04
Loss = 7.7365e-03, PNorm = 143.7951, GNorm = 0.3902, lr_0 = 4.0385e-04
Loss = 7.0103e-03, PNorm = 143.8077, GNorm = 0.2602, lr_0 = 4.0357e-04
Loss = 7.5633e-03, PNorm = 143.8206, GNorm = 0.1971, lr_0 = 4.0330e-04
Loss = 7.6385e-03, PNorm = 143.8323, GNorm = 0.2796, lr_0 = 4.0302e-04
Loss = 6.3051e-03, PNorm = 143.8476, GNorm = 0.2042, lr_0 = 4.0274e-04
Loss = 7.6105e-03, PNorm = 143.8585, GNorm = 0.2327, lr_0 = 4.0247e-04
Loss = 7.0906e-03, PNorm = 143.8745, GNorm = 0.1795, lr_0 = 4.0219e-04
Loss = 6.5915e-03, PNorm = 143.8885, GNorm = 0.1990, lr_0 = 4.0192e-04
Loss = 1.0210e-02, PNorm = 143.9032, GNorm = 0.3129, lr_0 = 4.0164e-04
Loss = 8.1636e-03, PNorm = 143.9168, GNorm = 0.2060, lr_0 = 4.0137e-04
Loss = 7.0030e-03, PNorm = 143.9304, GNorm = 0.1553, lr_0 = 4.0109e-04
Loss = 6.7943e-03, PNorm = 143.9396, GNorm = 0.3763, lr_0 = 4.0082e-04
Loss = 7.2799e-03, PNorm = 143.9502, GNorm = 0.1599, lr_0 = 4.0054e-04
Loss = 6.3692e-03, PNorm = 143.9657, GNorm = 0.3272, lr_0 = 4.0027e-04
Loss = 7.3894e-03, PNorm = 143.9821, GNorm = 0.1691, lr_0 = 3.9999e-04
Loss = 6.3654e-03, PNorm = 143.9957, GNorm = 0.1830, lr_0 = 3.9972e-04
Loss = 6.5812e-03, PNorm = 144.0116, GNorm = 0.2608, lr_0 = 3.9945e-04
Loss = 7.2309e-03, PNorm = 144.0241, GNorm = 0.3036, lr_0 = 3.9917e-04
Loss = 7.1924e-03, PNorm = 144.0340, GNorm = 0.2019, lr_0 = 3.9890e-04
Loss = 8.4191e-03, PNorm = 144.0478, GNorm = 0.1234, lr_0 = 3.9863e-04
Loss = 7.2725e-03, PNorm = 144.0648, GNorm = 0.3061, lr_0 = 3.9835e-04
Loss = 7.6577e-03, PNorm = 144.0805, GNorm = 0.1202, lr_0 = 3.9808e-04
Loss = 7.2144e-03, PNorm = 144.0986, GNorm = 0.1841, lr_0 = 3.9781e-04
Loss = 8.3116e-03, PNorm = 144.1149, GNorm = 0.2206, lr_0 = 3.9753e-04
Loss = 7.4109e-03, PNorm = 144.1296, GNorm = 0.2400, lr_0 = 3.9726e-04
Loss = 7.1843e-03, PNorm = 144.1458, GNorm = 0.0729, lr_0 = 3.9699e-04
Loss = 5.1105e-03, PNorm = 144.1564, GNorm = 0.2332, lr_0 = 3.9672e-04
Loss = 7.3703e-03, PNorm = 144.1713, GNorm = 0.2597, lr_0 = 3.9645e-04
Loss = 1.0232e-02, PNorm = 144.1864, GNorm = 0.6533, lr_0 = 3.9617e-04
Loss = 6.0431e-03, PNorm = 144.2028, GNorm = 0.1301, lr_0 = 3.9590e-04
Loss = 7.9441e-03, PNorm = 144.2188, GNorm = 0.2490, lr_0 = 3.9563e-04
Loss = 7.4089e-03, PNorm = 144.2351, GNorm = 0.2283, lr_0 = 3.9536e-04
Loss = 5.4442e-03, PNorm = 144.2520, GNorm = 0.0952, lr_0 = 3.9509e-04
Loss = 6.5563e-03, PNorm = 144.2662, GNorm = 0.2515, lr_0 = 3.9482e-04
Loss = 8.5823e-03, PNorm = 144.2823, GNorm = 0.1179, lr_0 = 3.9455e-04
Loss = 6.1841e-03, PNorm = 144.2973, GNorm = 0.6911, lr_0 = 3.9428e-04
Loss = 6.5033e-03, PNorm = 144.3137, GNorm = 0.1793, lr_0 = 3.9401e-04
Loss = 6.4364e-03, PNorm = 144.3294, GNorm = 0.1504, lr_0 = 3.9374e-04
Loss = 7.8336e-03, PNorm = 144.3406, GNorm = 0.3994, lr_0 = 3.9347e-04
Loss = 7.6751e-03, PNorm = 144.3547, GNorm = 0.2622, lr_0 = 3.9320e-04
Loss = 6.5298e-03, PNorm = 144.3725, GNorm = 0.2077, lr_0 = 3.9293e-04
Loss = 6.9019e-03, PNorm = 144.3867, GNorm = 0.1920, lr_0 = 3.9266e-04
Loss = 6.0627e-03, PNorm = 144.4017, GNorm = 0.2250, lr_0 = 3.9239e-04
Loss = 6.4832e-03, PNorm = 144.4183, GNorm = 0.1282, lr_0 = 3.9212e-04
Loss = 6.4012e-03, PNorm = 144.4380, GNorm = 0.1742, lr_0 = 3.9185e-04
Loss = 6.7305e-03, PNorm = 144.4591, GNorm = 0.4111, lr_0 = 3.9159e-04
Loss = 5.4656e-03, PNorm = 144.4756, GNorm = 0.1357, lr_0 = 3.9132e-04
Loss = 7.1509e-03, PNorm = 144.4864, GNorm = 0.2238, lr_0 = 3.9105e-04
Loss = 6.7612e-03, PNorm = 144.5017, GNorm = 0.3439, lr_0 = 3.9078e-04
Loss = 7.3980e-03, PNorm = 144.5172, GNorm = 0.1452, lr_0 = 3.9051e-04
Loss = 7.9041e-03, PNorm = 144.5338, GNorm = 0.2130, lr_0 = 3.9025e-04
Loss = 8.0468e-03, PNorm = 144.5507, GNorm = 0.3086, lr_0 = 3.8998e-04
Loss = 6.8065e-03, PNorm = 144.5635, GNorm = 0.4792, lr_0 = 3.8971e-04
Loss = 6.4504e-03, PNorm = 144.5777, GNorm = 0.2449, lr_0 = 3.8945e-04
Loss = 1.1707e-02, PNorm = 144.5914, GNorm = 0.1253, lr_0 = 3.8918e-04
Loss = 7.8759e-03, PNorm = 144.6065, GNorm = 0.2210, lr_0 = 3.8891e-04
Loss = 6.8175e-03, PNorm = 144.6228, GNorm = 0.3902, lr_0 = 3.8865e-04
Loss = 6.0046e-03, PNorm = 144.6417, GNorm = 0.2116, lr_0 = 3.8838e-04
Loss = 8.1389e-03, PNorm = 144.6581, GNorm = 0.1086, lr_0 = 3.8811e-04
Loss = 7.4428e-03, PNorm = 144.6755, GNorm = 0.2072, lr_0 = 3.8785e-04
Loss = 6.3042e-03, PNorm = 144.6913, GNorm = 0.1128, lr_0 = 3.8758e-04
Loss = 6.4121e-03, PNorm = 144.7025, GNorm = 0.2738, lr_0 = 3.8732e-04
Loss = 6.7469e-03, PNorm = 144.7205, GNorm = 0.1457, lr_0 = 3.8705e-04
Loss = 7.4265e-03, PNorm = 144.7378, GNorm = 0.1580, lr_0 = 3.8679e-04
Loss = 9.7436e-03, PNorm = 144.7586, GNorm = 0.1725, lr_0 = 3.8652e-04
Loss = 6.8643e-03, PNorm = 144.7752, GNorm = 0.0756, lr_0 = 3.8626e-04
Loss = 7.6409e-03, PNorm = 144.7885, GNorm = 0.1364, lr_0 = 3.8599e-04
Loss = 7.6028e-03, PNorm = 144.8032, GNorm = 0.1020, lr_0 = 3.8573e-04
Loss = 7.5677e-03, PNorm = 144.8220, GNorm = 0.4960, lr_0 = 3.8546e-04
Loss = 7.6889e-03, PNorm = 144.8409, GNorm = 0.6045, lr_0 = 3.8520e-04
Loss = 8.5362e-03, PNorm = 144.8617, GNorm = 0.2592, lr_0 = 3.8493e-04
Loss = 8.8187e-03, PNorm = 144.8859, GNorm = 0.1572, lr_0 = 3.8467e-04
Loss = 7.0290e-03, PNorm = 144.9061, GNorm = 0.4582, lr_0 = 3.8441e-04
Loss = 7.1790e-03, PNorm = 144.9226, GNorm = 0.1967, lr_0 = 3.8414e-04
Loss = 6.4314e-03, PNorm = 144.9386, GNorm = 0.1896, lr_0 = 3.8388e-04
Loss = 5.7453e-03, PNorm = 144.9565, GNorm = 0.1712, lr_0 = 3.8362e-04
Loss = 7.2887e-03, PNorm = 144.9724, GNorm = 0.1541, lr_0 = 3.8336e-04
Loss = 6.5949e-03, PNorm = 144.9878, GNorm = 0.1312, lr_0 = 3.8309e-04
Loss = 5.4480e-03, PNorm = 145.0024, GNorm = 0.3961, lr_0 = 3.8283e-04
Loss = 9.1934e-03, PNorm = 145.0166, GNorm = 0.3405, lr_0 = 3.8257e-04
Loss = 6.9216e-03, PNorm = 145.0352, GNorm = 0.1157, lr_0 = 3.8231e-04
Loss = 7.1191e-03, PNorm = 145.0559, GNorm = 0.1661, lr_0 = 3.8204e-04
Loss = 7.0375e-03, PNorm = 145.0737, GNorm = 0.1979, lr_0 = 3.8178e-04
Loss = 8.6088e-03, PNorm = 145.0921, GNorm = 0.4524, lr_0 = 3.8152e-04
Loss = 6.9544e-03, PNorm = 145.1060, GNorm = 0.2730, lr_0 = 3.8126e-04
Loss = 7.1230e-03, PNorm = 145.1288, GNorm = 0.6859, lr_0 = 3.8100e-04
Loss = 5.3987e-03, PNorm = 145.1487, GNorm = 0.1762, lr_0 = 3.8074e-04
Loss = 1.2035e-02, PNorm = 145.1603, GNorm = 0.3433, lr_0 = 3.8048e-04
Loss = 8.1410e-03, PNorm = 145.1719, GNorm = 0.4139, lr_0 = 3.8022e-04
Loss = 7.3577e-03, PNorm = 145.1915, GNorm = 0.2989, lr_0 = 3.7995e-04
Loss = 6.3774e-03, PNorm = 145.2094, GNorm = 0.1124, lr_0 = 3.7969e-04
Loss = 7.0331e-03, PNorm = 145.2246, GNorm = 0.2299, lr_0 = 3.7943e-04
Loss = 7.6041e-03, PNorm = 145.2449, GNorm = 0.2905, lr_0 = 3.7917e-04
Loss = 8.7860e-03, PNorm = 145.2611, GNorm = 0.5011, lr_0 = 3.7891e-04
Loss = 9.0505e-03, PNorm = 145.2758, GNorm = 0.3082, lr_0 = 3.7866e-04
Loss = 8.4966e-03, PNorm = 145.2965, GNorm = 0.1840, lr_0 = 3.7840e-04
Loss = 6.1093e-03, PNorm = 145.3184, GNorm = 0.2256, lr_0 = 3.7814e-04
Loss = 6.5643e-03, PNorm = 145.3375, GNorm = 0.0820, lr_0 = 3.7788e-04
Loss = 9.8730e-03, PNorm = 145.3569, GNorm = 0.1016, lr_0 = 3.7762e-04
Loss = 6.5373e-03, PNorm = 145.3731, GNorm = 0.1478, lr_0 = 3.7736e-04
Loss = 7.7883e-03, PNorm = 145.3887, GNorm = 0.2163, lr_0 = 3.7710e-04
Loss = 7.4302e-03, PNorm = 145.4032, GNorm = 0.1544, lr_0 = 3.7684e-04
Loss = 8.2164e-03, PNorm = 145.4162, GNorm = 0.1813, lr_0 = 3.7659e-04
Loss = 5.7312e-03, PNorm = 145.4323, GNorm = 0.1820, lr_0 = 3.7633e-04
Loss = 9.1902e-03, PNorm = 145.4492, GNorm = 0.1356, lr_0 = 3.7607e-04
Loss = 8.0196e-03, PNorm = 145.4684, GNorm = 0.1470, lr_0 = 3.7581e-04
Loss = 7.1526e-03, PNorm = 145.4879, GNorm = 0.1080, lr_0 = 3.7555e-04
Loss = 7.2706e-03, PNorm = 145.5047, GNorm = 0.2138, lr_0 = 3.7530e-04
Loss = 6.5964e-03, PNorm = 145.5230, GNorm = 0.1705, lr_0 = 3.7504e-04
Loss = 6.5531e-03, PNorm = 145.5357, GNorm = 0.3026, lr_0 = 3.7478e-04
Loss = 6.5192e-03, PNorm = 145.5509, GNorm = 0.0991, lr_0 = 3.7453e-04
Loss = 7.0435e-03, PNorm = 145.5714, GNorm = 0.1529, lr_0 = 3.7427e-04
Loss = 6.9826e-03, PNorm = 145.5845, GNorm = 0.1088, lr_0 = 3.7401e-04
Loss = 5.9887e-03, PNorm = 145.6024, GNorm = 0.2002, lr_0 = 3.7376e-04
Loss = 6.4404e-03, PNorm = 145.6214, GNorm = 0.2469, lr_0 = 3.7350e-04
Loss = 6.6639e-03, PNorm = 145.6360, GNorm = 0.2163, lr_0 = 3.7325e-04
Loss = 7.3255e-03, PNorm = 145.6496, GNorm = 0.1108, lr_0 = 3.7299e-04
Loss = 8.5752e-03, PNorm = 145.6647, GNorm = 0.1995, lr_0 = 3.7273e-04
Validation mae = 0.478339
Epoch 14
Loss = 5.6150e-03, PNorm = 145.6790, GNorm = 0.2689, lr_0 = 3.7248e-04
Loss = 6.5994e-03, PNorm = 145.6885, GNorm = 0.2455, lr_0 = 3.7222e-04
Loss = 6.2223e-03, PNorm = 145.6968, GNorm = 0.0780, lr_0 = 3.7197e-04
Loss = 6.6648e-03, PNorm = 145.7104, GNorm = 0.3194, lr_0 = 3.7171e-04
Loss = 7.5217e-03, PNorm = 145.7247, GNorm = 0.5627, lr_0 = 3.7146e-04
Loss = 7.1834e-03, PNorm = 145.7370, GNorm = 0.1573, lr_0 = 3.7120e-04
Loss = 5.8012e-03, PNorm = 145.7542, GNorm = 0.1720, lr_0 = 3.7095e-04
Loss = 5.8485e-03, PNorm = 145.7683, GNorm = 0.0897, lr_0 = 3.7070e-04
Loss = 6.0933e-03, PNorm = 145.7794, GNorm = 0.2630, lr_0 = 3.7044e-04
Loss = 6.1857e-03, PNorm = 145.7924, GNorm = 0.3580, lr_0 = 3.7019e-04
Loss = 7.6344e-03, PNorm = 145.8028, GNorm = 0.2813, lr_0 = 3.6993e-04
Loss = 7.1799e-03, PNorm = 145.8155, GNorm = 0.2727, lr_0 = 3.6968e-04
Loss = 6.4609e-03, PNorm = 145.8272, GNorm = 0.4859, lr_0 = 3.6943e-04
Loss = 7.0090e-03, PNorm = 145.8426, GNorm = 0.1486, lr_0 = 3.6917e-04
Loss = 5.6851e-03, PNorm = 145.8592, GNorm = 0.1506, lr_0 = 3.6892e-04
Loss = 6.5695e-03, PNorm = 145.8738, GNorm = 0.0859, lr_0 = 3.6867e-04
Loss = 6.8600e-03, PNorm = 145.8862, GNorm = 0.1433, lr_0 = 3.6842e-04
Loss = 6.1844e-03, PNorm = 145.9003, GNorm = 0.3523, lr_0 = 3.6816e-04
Loss = 5.4596e-03, PNorm = 145.9122, GNorm = 0.1769, lr_0 = 3.6791e-04
Loss = 4.9889e-03, PNorm = 145.9240, GNorm = 0.1740, lr_0 = 3.6766e-04
Loss = 5.2363e-03, PNorm = 145.9356, GNorm = 0.2996, lr_0 = 3.6741e-04
Loss = 7.4597e-03, PNorm = 145.9480, GNorm = 0.3253, lr_0 = 3.6716e-04
Loss = 7.4929e-03, PNorm = 145.9580, GNorm = 0.2111, lr_0 = 3.6690e-04
Loss = 5.4804e-03, PNorm = 145.9684, GNorm = 0.0769, lr_0 = 3.6665e-04
Loss = 7.9596e-03, PNorm = 145.9806, GNorm = 0.1076, lr_0 = 3.6640e-04
Loss = 6.6354e-03, PNorm = 145.9916, GNorm = 0.2819, lr_0 = 3.6615e-04
Loss = 6.3524e-03, PNorm = 146.0027, GNorm = 0.2933, lr_0 = 3.6590e-04
Loss = 5.1253e-03, PNorm = 146.0159, GNorm = 0.1191, lr_0 = 3.6565e-04
Loss = 5.5968e-03, PNorm = 146.0270, GNorm = 0.1383, lr_0 = 3.6540e-04
Loss = 5.7160e-03, PNorm = 146.0403, GNorm = 0.1232, lr_0 = 3.6515e-04
Loss = 7.9648e-03, PNorm = 146.0553, GNorm = 0.2856, lr_0 = 3.6490e-04
Loss = 5.6409e-03, PNorm = 146.0687, GNorm = 0.0597, lr_0 = 3.6465e-04
Loss = 5.5113e-03, PNorm = 146.0813, GNorm = 0.2741, lr_0 = 3.6440e-04
Loss = 5.6509e-03, PNorm = 146.0944, GNorm = 0.4114, lr_0 = 3.6415e-04
Loss = 6.3240e-03, PNorm = 146.1052, GNorm = 0.2356, lr_0 = 3.6390e-04
Loss = 5.8788e-03, PNorm = 146.1179, GNorm = 0.2398, lr_0 = 3.6365e-04
Loss = 5.8974e-03, PNorm = 146.1323, GNorm = 0.1027, lr_0 = 3.6340e-04
Loss = 5.2324e-03, PNorm = 146.1448, GNorm = 0.2767, lr_0 = 3.6315e-04
Loss = 5.7344e-03, PNorm = 146.1552, GNorm = 0.3071, lr_0 = 3.6290e-04
Loss = 6.2508e-03, PNorm = 146.1677, GNorm = 0.2899, lr_0 = 3.6266e-04
Loss = 6.5135e-03, PNorm = 146.1814, GNorm = 0.1469, lr_0 = 3.6241e-04
Loss = 6.5546e-03, PNorm = 146.1971, GNorm = 0.1244, lr_0 = 3.6216e-04
Loss = 6.4803e-03, PNorm = 146.2094, GNorm = 0.1448, lr_0 = 3.6191e-04
Loss = 6.8442e-03, PNorm = 146.2228, GNorm = 0.2037, lr_0 = 3.6166e-04
Loss = 6.0528e-03, PNorm = 146.2373, GNorm = 0.1141, lr_0 = 3.6141e-04
Loss = 5.8783e-03, PNorm = 146.2543, GNorm = 0.1185, lr_0 = 3.6117e-04
Loss = 1.0704e-02, PNorm = 146.2673, GNorm = 0.2861, lr_0 = 3.6092e-04
Loss = 6.3528e-03, PNorm = 146.2823, GNorm = 0.1345, lr_0 = 3.6067e-04
Loss = 7.0060e-03, PNorm = 146.2955, GNorm = 0.1780, lr_0 = 3.6043e-04
Loss = 6.4518e-03, PNorm = 146.3083, GNorm = 0.1792, lr_0 = 3.6018e-04
Loss = 8.7145e-03, PNorm = 146.3207, GNorm = 0.4653, lr_0 = 3.5993e-04
Loss = 4.9727e-03, PNorm = 146.3355, GNorm = 0.0761, lr_0 = 3.5969e-04
Loss = 5.1267e-03, PNorm = 146.3491, GNorm = 0.4070, lr_0 = 3.5944e-04
Loss = 1.0939e-02, PNorm = 146.3642, GNorm = 0.3594, lr_0 = 3.5919e-04
Loss = 5.7907e-03, PNorm = 146.3786, GNorm = 0.2741, lr_0 = 3.5895e-04
Loss = 6.0443e-03, PNorm = 146.3941, GNorm = 0.1990, lr_0 = 3.5870e-04
Loss = 6.8258e-03, PNorm = 146.4073, GNorm = 0.1327, lr_0 = 3.5845e-04
Loss = 5.9783e-03, PNorm = 146.4201, GNorm = 0.1160, lr_0 = 3.5821e-04
Loss = 5.6645e-03, PNorm = 146.4378, GNorm = 0.2916, lr_0 = 3.5796e-04
Loss = 5.5410e-03, PNorm = 146.4536, GNorm = 0.1486, lr_0 = 3.5772e-04
Loss = 6.5403e-03, PNorm = 146.4684, GNorm = 0.3599, lr_0 = 3.5747e-04
Loss = 6.5401e-03, PNorm = 146.4813, GNorm = 0.1852, lr_0 = 3.5723e-04
Loss = 6.2862e-03, PNorm = 146.4924, GNorm = 0.1032, lr_0 = 3.5698e-04
Loss = 5.6453e-03, PNorm = 146.5073, GNorm = 0.1763, lr_0 = 3.5674e-04
Loss = 5.9160e-03, PNorm = 146.5215, GNorm = 0.1269, lr_0 = 3.5650e-04
Loss = 4.6098e-03, PNorm = 146.5341, GNorm = 0.2097, lr_0 = 3.5625e-04
Loss = 6.0921e-03, PNorm = 146.5481, GNorm = 0.1680, lr_0 = 3.5601e-04
Loss = 6.4340e-03, PNorm = 146.5646, GNorm = 0.1849, lr_0 = 3.5576e-04
Loss = 5.2526e-03, PNorm = 146.5787, GNorm = 0.2546, lr_0 = 3.5552e-04
Loss = 7.0219e-03, PNorm = 146.5933, GNorm = 0.2012, lr_0 = 3.5528e-04
Loss = 8.9480e-03, PNorm = 146.6095, GNorm = 0.4108, lr_0 = 3.5503e-04
Loss = 6.6243e-03, PNorm = 146.6281, GNorm = 0.3072, lr_0 = 3.5479e-04
Loss = 5.4333e-03, PNorm = 146.6448, GNorm = 0.0946, lr_0 = 3.5455e-04
Loss = 5.5368e-03, PNorm = 146.6581, GNorm = 0.1382, lr_0 = 3.5430e-04
Loss = 5.7369e-03, PNorm = 146.6695, GNorm = 0.1997, lr_0 = 3.5406e-04
Loss = 7.0454e-03, PNorm = 146.6799, GNorm = 0.0690, lr_0 = 3.5382e-04
Loss = 5.8559e-03, PNorm = 146.6957, GNorm = 0.3817, lr_0 = 3.5358e-04
Loss = 6.3445e-03, PNorm = 146.7149, GNorm = 0.3699, lr_0 = 3.5333e-04
Loss = 5.5242e-03, PNorm = 146.7319, GNorm = 0.2510, lr_0 = 3.5309e-04
Loss = 5.5353e-03, PNorm = 146.7470, GNorm = 0.2005, lr_0 = 3.5285e-04
Loss = 6.4973e-03, PNorm = 146.7647, GNorm = 0.1693, lr_0 = 3.5261e-04
Loss = 7.0244e-03, PNorm = 146.7816, GNorm = 0.1678, lr_0 = 3.5237e-04
Loss = 5.3756e-03, PNorm = 146.7951, GNorm = 0.2836, lr_0 = 3.5212e-04
Loss = 6.8754e-03, PNorm = 146.8074, GNorm = 0.2301, lr_0 = 3.5188e-04
Loss = 5.0537e-03, PNorm = 146.8207, GNorm = 0.1318, lr_0 = 3.5164e-04
Loss = 6.0554e-03, PNorm = 146.8305, GNorm = 0.1938, lr_0 = 3.5140e-04
Loss = 6.2869e-03, PNorm = 146.8443, GNorm = 0.5137, lr_0 = 3.5116e-04
Loss = 7.8274e-03, PNorm = 146.8598, GNorm = 0.3234, lr_0 = 3.5092e-04
Loss = 8.3110e-03, PNorm = 146.8763, GNorm = 0.2167, lr_0 = 3.5068e-04
Loss = 6.2764e-03, PNorm = 146.8930, GNorm = 0.3610, lr_0 = 3.5044e-04
Loss = 7.1060e-03, PNorm = 146.9099, GNorm = 0.0688, lr_0 = 3.5020e-04
Loss = 5.3011e-03, PNorm = 146.9245, GNorm = 0.1636, lr_0 = 3.4996e-04
Loss = 5.9502e-03, PNorm = 146.9428, GNorm = 0.0794, lr_0 = 3.4972e-04
Loss = 7.9299e-03, PNorm = 146.9576, GNorm = 0.3198, lr_0 = 3.4948e-04
Loss = 6.5697e-03, PNorm = 146.9723, GNorm = 0.1467, lr_0 = 3.4924e-04
Loss = 5.3604e-03, PNorm = 146.9866, GNorm = 0.2154, lr_0 = 3.4900e-04
Loss = 6.2261e-03, PNorm = 146.9993, GNorm = 0.1852, lr_0 = 3.4876e-04
Loss = 7.0683e-03, PNorm = 147.0118, GNorm = 0.0900, lr_0 = 3.4852e-04
Loss = 5.2541e-03, PNorm = 147.0250, GNorm = 0.4090, lr_0 = 3.4828e-04
Loss = 6.9536e-03, PNorm = 147.0423, GNorm = 0.2258, lr_0 = 3.4805e-04
Loss = 5.5064e-03, PNorm = 147.0545, GNorm = 0.0995, lr_0 = 3.4781e-04
Loss = 6.5796e-03, PNorm = 147.0645, GNorm = 0.1746, lr_0 = 3.4757e-04
Loss = 6.7536e-03, PNorm = 147.0781, GNorm = 0.2917, lr_0 = 3.4733e-04
Loss = 9.1110e-03, PNorm = 147.0930, GNorm = 0.1117, lr_0 = 3.4709e-04
Loss = 6.0314e-03, PNorm = 147.1051, GNorm = 0.2674, lr_0 = 3.4686e-04
Loss = 8.2840e-03, PNorm = 147.1145, GNorm = 0.1494, lr_0 = 3.4662e-04
Loss = 8.1982e-03, PNorm = 147.1281, GNorm = 0.2618, lr_0 = 3.4638e-04
Loss = 5.0338e-03, PNorm = 147.1405, GNorm = 0.1606, lr_0 = 3.4614e-04
Loss = 5.2975e-03, PNorm = 147.1547, GNorm = 0.1970, lr_0 = 3.4591e-04
Loss = 5.9015e-03, PNorm = 147.1717, GNorm = 0.1682, lr_0 = 3.4567e-04
Loss = 6.5677e-03, PNorm = 147.1901, GNorm = 0.1208, lr_0 = 3.4543e-04
Loss = 5.0539e-03, PNorm = 147.2064, GNorm = 0.1880, lr_0 = 3.4520e-04
Loss = 5.4292e-03, PNorm = 147.2209, GNorm = 0.2782, lr_0 = 3.4496e-04
Loss = 5.9125e-03, PNorm = 147.2382, GNorm = 0.1958, lr_0 = 3.4472e-04
Loss = 6.2980e-03, PNorm = 147.2572, GNorm = 0.0949, lr_0 = 3.4449e-04
Loss = 6.0733e-03, PNorm = 147.2732, GNorm = 0.1466, lr_0 = 3.4425e-04
Loss = 5.8766e-03, PNorm = 147.2855, GNorm = 0.2715, lr_0 = 3.4402e-04
Loss = 5.8042e-03, PNorm = 147.3002, GNorm = 0.1907, lr_0 = 3.4378e-04
Loss = 5.5175e-03, PNorm = 147.3123, GNorm = 0.1486, lr_0 = 3.4354e-04
Loss = 4.4458e-03, PNorm = 147.3260, GNorm = 0.1184, lr_0 = 3.4331e-04
Validation mae = 0.478438
Epoch 15
Loss = 5.8735e-03, PNorm = 147.3402, GNorm = 0.2495, lr_0 = 3.4307e-04
Loss = 4.6469e-03, PNorm = 147.3505, GNorm = 0.0856, lr_0 = 3.4284e-04
Loss = 6.1691e-03, PNorm = 147.3606, GNorm = 0.0822, lr_0 = 3.4260e-04
Loss = 4.8304e-03, PNorm = 147.3711, GNorm = 0.1527, lr_0 = 3.4237e-04
Loss = 5.2063e-03, PNorm = 147.3842, GNorm = 0.3450, lr_0 = 3.4213e-04
Loss = 5.5876e-03, PNorm = 147.3947, GNorm = 0.1020, lr_0 = 3.4190e-04
Loss = 5.9634e-03, PNorm = 147.4047, GNorm = 0.4997, lr_0 = 3.4167e-04
Loss = 5.2571e-03, PNorm = 147.4123, GNorm = 0.1446, lr_0 = 3.4143e-04
Loss = 6.1683e-03, PNorm = 147.4236, GNorm = 0.2163, lr_0 = 3.4120e-04
Loss = 5.4018e-03, PNorm = 147.4326, GNorm = 0.1172, lr_0 = 3.4096e-04
Loss = 5.9604e-03, PNorm = 147.4412, GNorm = 0.3408, lr_0 = 3.4073e-04
Loss = 6.6387e-03, PNorm = 147.4554, GNorm = 0.2632, lr_0 = 3.4050e-04
Loss = 5.8405e-03, PNorm = 147.4643, GNorm = 0.3674, lr_0 = 3.4026e-04
Loss = 7.8512e-03, PNorm = 147.4717, GNorm = 0.5443, lr_0 = 3.4003e-04
Loss = 5.0400e-03, PNorm = 147.4836, GNorm = 0.1147, lr_0 = 3.3980e-04
Loss = 5.1072e-03, PNorm = 147.4924, GNorm = 0.1345, lr_0 = 3.3956e-04
Loss = 4.5377e-03, PNorm = 147.5015, GNorm = 0.1366, lr_0 = 3.3933e-04
Loss = 4.7017e-03, PNorm = 147.5152, GNorm = 0.0612, lr_0 = 3.3910e-04
Loss = 5.5377e-03, PNorm = 147.5274, GNorm = 0.2231, lr_0 = 3.3887e-04
Loss = 5.4730e-03, PNorm = 147.5391, GNorm = 0.1377, lr_0 = 3.3864e-04
Loss = 4.9395e-03, PNorm = 147.5487, GNorm = 0.1892, lr_0 = 3.3840e-04
Loss = 5.5364e-03, PNorm = 147.5614, GNorm = 0.3805, lr_0 = 3.3817e-04
Loss = 6.1158e-03, PNorm = 147.5704, GNorm = 0.0720, lr_0 = 3.3794e-04
Loss = 5.5352e-03, PNorm = 147.5800, GNorm = 0.1189, lr_0 = 3.3771e-04
Loss = 5.0897e-03, PNorm = 147.5893, GNorm = 0.1335, lr_0 = 3.3748e-04
Loss = 4.7840e-03, PNorm = 147.6041, GNorm = 0.1940, lr_0 = 3.3725e-04
Loss = 8.1117e-03, PNorm = 147.6182, GNorm = 0.1941, lr_0 = 3.3701e-04
Loss = 5.9497e-03, PNorm = 147.6299, GNorm = 0.0763, lr_0 = 3.3678e-04
Loss = 5.0089e-03, PNorm = 147.6404, GNorm = 0.3279, lr_0 = 3.3655e-04
Loss = 5.3512e-03, PNorm = 147.6529, GNorm = 0.1308, lr_0 = 3.3632e-04
Loss = 5.7984e-03, PNorm = 147.6658, GNorm = 0.1979, lr_0 = 3.3609e-04
Loss = 4.9032e-03, PNorm = 147.6779, GNorm = 0.1669, lr_0 = 3.3586e-04
Loss = 4.3305e-03, PNorm = 147.6881, GNorm = 0.1404, lr_0 = 3.3563e-04
Loss = 4.8596e-03, PNorm = 147.6975, GNorm = 0.1053, lr_0 = 3.3540e-04
Loss = 4.9252e-03, PNorm = 147.7104, GNorm = 0.0714, lr_0 = 3.3517e-04
Loss = 4.3980e-03, PNorm = 147.7255, GNorm = 0.0806, lr_0 = 3.3494e-04
Loss = 5.1137e-03, PNorm = 147.7343, GNorm = 0.2254, lr_0 = 3.3471e-04
Loss = 4.3740e-03, PNorm = 147.7461, GNorm = 0.4380, lr_0 = 3.3448e-04
Loss = 6.4677e-03, PNorm = 147.7555, GNorm = 0.4981, lr_0 = 3.3425e-04
Loss = 5.1960e-03, PNorm = 147.7699, GNorm = 0.1993, lr_0 = 3.3403e-04
Loss = 4.6500e-03, PNorm = 147.7830, GNorm = 0.2603, lr_0 = 3.3380e-04
Loss = 5.8572e-03, PNorm = 147.7951, GNorm = 0.2341, lr_0 = 3.3357e-04
Loss = 5.1289e-03, PNorm = 147.8095, GNorm = 0.1716, lr_0 = 3.3334e-04
Loss = 5.3124e-03, PNorm = 147.8207, GNorm = 0.3070, lr_0 = 3.3311e-04
Loss = 3.9513e-03, PNorm = 147.8321, GNorm = 0.1451, lr_0 = 3.3288e-04
Loss = 4.6433e-03, PNorm = 147.8411, GNorm = 0.1254, lr_0 = 3.3265e-04
Loss = 4.0154e-03, PNorm = 147.8505, GNorm = 0.1147, lr_0 = 3.3243e-04
Loss = 5.2631e-03, PNorm = 147.8606, GNorm = 0.3745, lr_0 = 3.3220e-04
Loss = 6.1858e-03, PNorm = 147.8675, GNorm = 0.2928, lr_0 = 3.3197e-04
Loss = 4.1102e-03, PNorm = 147.8747, GNorm = 0.1187, lr_0 = 3.3174e-04
Loss = 7.0558e-03, PNorm = 147.8823, GNorm = 0.2558, lr_0 = 3.3152e-04
Loss = 4.2036e-03, PNorm = 147.8927, GNorm = 0.2297, lr_0 = 3.3129e-04
Loss = 6.6184e-03, PNorm = 147.9055, GNorm = 0.1897, lr_0 = 3.3106e-04
Loss = 5.0228e-03, PNorm = 147.9159, GNorm = 0.1891, lr_0 = 3.3084e-04
Loss = 5.3362e-03, PNorm = 147.9272, GNorm = 0.2096, lr_0 = 3.3061e-04
Loss = 4.6580e-03, PNorm = 147.9392, GNorm = 0.1826, lr_0 = 3.3038e-04
Loss = 7.4023e-03, PNorm = 147.9506, GNorm = 0.3710, lr_0 = 3.3016e-04
Loss = 4.1039e-03, PNorm = 147.9650, GNorm = 0.1970, lr_0 = 3.2993e-04
Loss = 5.9612e-03, PNorm = 147.9761, GNorm = 0.2107, lr_0 = 3.2970e-04
Loss = 5.5302e-03, PNorm = 147.9892, GNorm = 0.6017, lr_0 = 3.2948e-04
Loss = 4.4989e-03, PNorm = 148.0015, GNorm = 0.2352, lr_0 = 3.2925e-04
Loss = 5.4989e-03, PNorm = 148.0110, GNorm = 0.1394, lr_0 = 3.2903e-04
Loss = 4.6009e-03, PNorm = 148.0237, GNorm = 0.4656, lr_0 = 3.2880e-04
Loss = 4.9456e-03, PNorm = 148.0345, GNorm = 0.1718, lr_0 = 3.2858e-04
Loss = 5.4273e-03, PNorm = 148.0440, GNorm = 0.2292, lr_0 = 3.2835e-04
Loss = 4.9095e-03, PNorm = 148.0522, GNorm = 0.1294, lr_0 = 3.2813e-04
Loss = 5.8273e-03, PNorm = 148.0620, GNorm = 0.1873, lr_0 = 3.2790e-04
Loss = 4.6959e-03, PNorm = 148.0727, GNorm = 0.2020, lr_0 = 3.2768e-04
Loss = 4.2754e-03, PNorm = 148.0849, GNorm = 0.2189, lr_0 = 3.2745e-04
Loss = 7.1099e-03, PNorm = 148.0972, GNorm = 0.1919, lr_0 = 3.2723e-04
Loss = 8.2050e-03, PNorm = 148.1092, GNorm = 0.9295, lr_0 = 3.2700e-04
Loss = 4.4139e-03, PNorm = 148.1231, GNorm = 0.0977, lr_0 = 3.2678e-04
Loss = 4.0784e-03, PNorm = 148.1338, GNorm = 0.2126, lr_0 = 3.2656e-04
Loss = 5.3890e-03, PNorm = 148.1452, GNorm = 0.1367, lr_0 = 3.2633e-04
Loss = 4.5741e-03, PNorm = 148.1583, GNorm = 0.0901, lr_0 = 3.2611e-04
Loss = 7.0991e-03, PNorm = 148.1721, GNorm = 0.3688, lr_0 = 3.2589e-04
Loss = 4.3515e-03, PNorm = 148.1837, GNorm = 0.2185, lr_0 = 3.2566e-04
Loss = 5.3320e-03, PNorm = 148.1964, GNorm = 0.4115, lr_0 = 3.2544e-04
Loss = 4.8382e-03, PNorm = 148.2106, GNorm = 0.3311, lr_0 = 3.2522e-04
Loss = 4.7298e-03, PNorm = 148.2192, GNorm = 0.1880, lr_0 = 3.2499e-04
Loss = 4.0646e-03, PNorm = 148.2300, GNorm = 0.0999, lr_0 = 3.2477e-04
Loss = 6.8278e-03, PNorm = 148.2413, GNorm = 0.1024, lr_0 = 3.2455e-04
Loss = 4.5242e-03, PNorm = 148.2535, GNorm = 0.3672, lr_0 = 3.2433e-04
Loss = 4.6277e-03, PNorm = 148.2649, GNorm = 0.0892, lr_0 = 3.2410e-04
Loss = 4.4267e-03, PNorm = 148.2773, GNorm = 0.2037, lr_0 = 3.2388e-04
Loss = 4.6076e-03, PNorm = 148.2878, GNorm = 0.3024, lr_0 = 3.2366e-04
Loss = 4.8903e-03, PNorm = 148.3001, GNorm = 0.1434, lr_0 = 3.2344e-04
Loss = 5.2430e-03, PNorm = 148.3152, GNorm = 0.2245, lr_0 = 3.2322e-04
Loss = 5.8214e-03, PNorm = 148.3240, GNorm = 0.1811, lr_0 = 3.2300e-04
Loss = 4.7843e-03, PNorm = 148.3328, GNorm = 0.1698, lr_0 = 3.2277e-04
Loss = 5.0193e-03, PNorm = 148.3426, GNorm = 0.1826, lr_0 = 3.2255e-04
Loss = 6.6125e-03, PNorm = 148.3546, GNorm = 0.2594, lr_0 = 3.2233e-04
Loss = 5.0500e-03, PNorm = 148.3669, GNorm = 0.1174, lr_0 = 3.2211e-04
Loss = 4.7878e-03, PNorm = 148.3797, GNorm = 0.1418, lr_0 = 3.2189e-04
Loss = 5.1031e-03, PNorm = 148.3940, GNorm = 0.3249, lr_0 = 3.2167e-04
Loss = 7.6690e-03, PNorm = 148.4091, GNorm = 0.4182, lr_0 = 3.2145e-04
Loss = 4.7875e-03, PNorm = 148.4249, GNorm = 0.2437, lr_0 = 3.2123e-04
Loss = 6.0516e-03, PNorm = 148.4386, GNorm = 0.1413, lr_0 = 3.2101e-04
Loss = 5.6480e-03, PNorm = 148.4490, GNorm = 0.1631, lr_0 = 3.2079e-04
Loss = 5.2023e-03, PNorm = 148.4612, GNorm = 0.0711, lr_0 = 3.2057e-04
Loss = 6.6015e-03, PNorm = 148.4712, GNorm = 0.2316, lr_0 = 3.2035e-04
Loss = 5.8031e-03, PNorm = 148.4806, GNorm = 0.1099, lr_0 = 3.2013e-04
Loss = 5.7228e-03, PNorm = 148.4916, GNorm = 0.2492, lr_0 = 3.1991e-04
Loss = 6.7142e-03, PNorm = 148.5098, GNorm = 0.1676, lr_0 = 3.1969e-04
Loss = 6.3350e-03, PNorm = 148.5250, GNorm = 0.2498, lr_0 = 3.1947e-04
Loss = 4.6408e-03, PNorm = 148.5368, GNorm = 0.1060, lr_0 = 3.1925e-04
Loss = 6.4372e-03, PNorm = 148.5511, GNorm = 0.1353, lr_0 = 3.1904e-04
Loss = 6.1000e-03, PNorm = 148.5631, GNorm = 0.0988, lr_0 = 3.1882e-04
Loss = 5.2213e-03, PNorm = 148.5784, GNorm = 0.2308, lr_0 = 3.1860e-04
Loss = 5.9824e-03, PNorm = 148.5887, GNorm = 0.2332, lr_0 = 3.1838e-04
Loss = 4.5646e-03, PNorm = 148.5984, GNorm = 0.1272, lr_0 = 3.1816e-04
Loss = 5.9822e-03, PNorm = 148.6126, GNorm = 0.2603, lr_0 = 3.1794e-04
Loss = 5.6914e-03, PNorm = 148.6230, GNorm = 0.2218, lr_0 = 3.1773e-04
Loss = 6.8987e-03, PNorm = 148.6339, GNorm = 0.2330, lr_0 = 3.1751e-04
Loss = 4.6955e-03, PNorm = 148.6463, GNorm = 0.1159, lr_0 = 3.1729e-04
Loss = 4.3635e-03, PNorm = 148.6582, GNorm = 0.2565, lr_0 = 3.1707e-04
Loss = 5.0090e-03, PNorm = 148.6700, GNorm = 0.2287, lr_0 = 3.1686e-04
Loss = 4.3011e-03, PNorm = 148.6806, GNorm = 0.3422, lr_0 = 3.1664e-04
Loss = 3.8070e-03, PNorm = 148.6919, GNorm = 0.1103, lr_0 = 3.1642e-04
Loss = 4.8461e-03, PNorm = 148.7028, GNorm = 0.2128, lr_0 = 3.1621e-04
Validation mae = 0.477626
Epoch 16
Loss = 5.2181e-03, PNorm = 148.7117, GNorm = 0.2874, lr_0 = 3.1599e-04
Loss = 5.2595e-03, PNorm = 148.7202, GNorm = 0.1267, lr_0 = 3.1577e-04
Loss = 4.3370e-03, PNorm = 148.7273, GNorm = 0.1413, lr_0 = 3.1556e-04
Loss = 4.1099e-03, PNorm = 148.7340, GNorm = 0.2376, lr_0 = 3.1534e-04
Loss = 4.5681e-03, PNorm = 148.7420, GNorm = 0.0946, lr_0 = 3.1512e-04
Loss = 4.5977e-03, PNorm = 148.7489, GNorm = 0.1476, lr_0 = 3.1491e-04
Loss = 4.4521e-03, PNorm = 148.7598, GNorm = 0.1981, lr_0 = 3.1469e-04
Loss = 5.8245e-03, PNorm = 148.7672, GNorm = 0.1047, lr_0 = 3.1448e-04
Loss = 4.6497e-03, PNorm = 148.7750, GNorm = 0.2158, lr_0 = 3.1426e-04
Loss = 4.5948e-03, PNorm = 148.7836, GNorm = 0.2426, lr_0 = 3.1405e-04
Loss = 4.4623e-03, PNorm = 148.7932, GNorm = 0.1838, lr_0 = 3.1383e-04
Loss = 4.0931e-03, PNorm = 148.8004, GNorm = 0.2273, lr_0 = 3.1362e-04
Loss = 4.5239e-03, PNorm = 148.8091, GNorm = 0.2077, lr_0 = 3.1340e-04
Loss = 3.6794e-03, PNorm = 148.8164, GNorm = 0.2349, lr_0 = 3.1319e-04
Loss = 3.9668e-03, PNorm = 148.8285, GNorm = 0.1617, lr_0 = 3.1297e-04
Loss = 5.0533e-03, PNorm = 148.8379, GNorm = 0.2802, lr_0 = 3.1276e-04
Loss = 5.1337e-03, PNorm = 148.8465, GNorm = 0.2976, lr_0 = 3.1254e-04
Loss = 4.0393e-03, PNorm = 148.8533, GNorm = 0.2625, lr_0 = 3.1233e-04
Loss = 3.4896e-03, PNorm = 148.8608, GNorm = 0.2597, lr_0 = 3.1212e-04
Loss = 4.7921e-03, PNorm = 148.8685, GNorm = 0.0829, lr_0 = 3.1190e-04
Loss = 4.4236e-03, PNorm = 148.8771, GNorm = 0.3905, lr_0 = 3.1169e-04
Loss = 4.1841e-03, PNorm = 148.8897, GNorm = 0.0796, lr_0 = 3.1147e-04
Loss = 4.6905e-03, PNorm = 148.8974, GNorm = 0.1837, lr_0 = 3.1126e-04
Loss = 5.8783e-03, PNorm = 148.9060, GNorm = 0.2154, lr_0 = 3.1105e-04
Loss = 4.9788e-03, PNorm = 148.9158, GNorm = 0.2591, lr_0 = 3.1083e-04
Loss = 5.6358e-03, PNorm = 148.9239, GNorm = 0.2502, lr_0 = 3.1062e-04
Loss = 4.4397e-03, PNorm = 148.9330, GNorm = 0.2829, lr_0 = 3.1041e-04
Loss = 4.0924e-03, PNorm = 148.9416, GNorm = 0.1315, lr_0 = 3.1020e-04
Loss = 3.7513e-03, PNorm = 148.9516, GNorm = 0.2455, lr_0 = 3.0998e-04
Loss = 3.6070e-03, PNorm = 148.9622, GNorm = 0.2047, lr_0 = 3.0977e-04
Loss = 4.0128e-03, PNorm = 148.9699, GNorm = 0.2515, lr_0 = 3.0956e-04
Loss = 4.3848e-03, PNorm = 148.9802, GNorm = 0.1160, lr_0 = 3.0935e-04
Loss = 3.9933e-03, PNorm = 148.9887, GNorm = 0.2979, lr_0 = 3.0914e-04
Loss = 3.8384e-03, PNorm = 148.9999, GNorm = 0.1121, lr_0 = 3.0892e-04
Loss = 4.2032e-03, PNorm = 149.0079, GNorm = 0.1331, lr_0 = 3.0871e-04
Loss = 3.9572e-03, PNorm = 149.0137, GNorm = 0.1153, lr_0 = 3.0850e-04
Loss = 4.3650e-03, PNorm = 149.0232, GNorm = 0.1613, lr_0 = 3.0829e-04
Loss = 5.2200e-03, PNorm = 149.0326, GNorm = 0.3477, lr_0 = 3.0808e-04
Loss = 3.0598e-03, PNorm = 149.0436, GNorm = 0.0640, lr_0 = 3.0787e-04
Loss = 4.8719e-03, PNorm = 149.0539, GNorm = 0.3482, lr_0 = 3.0766e-04
Loss = 5.2157e-03, PNorm = 149.0599, GNorm = 0.3092, lr_0 = 3.0745e-04
Loss = 5.3312e-03, PNorm = 149.0688, GNorm = 0.3246, lr_0 = 3.0723e-04
Loss = 4.3692e-03, PNorm = 149.0798, GNorm = 0.1844, lr_0 = 3.0702e-04
Loss = 5.5034e-03, PNorm = 149.0864, GNorm = 0.0881, lr_0 = 3.0681e-04
Loss = 3.9828e-03, PNorm = 149.0951, GNorm = 0.0719, lr_0 = 3.0660e-04
Loss = 4.1470e-03, PNorm = 149.1000, GNorm = 0.1816, lr_0 = 3.0639e-04
Loss = 3.5016e-03, PNorm = 149.1049, GNorm = 0.0848, lr_0 = 3.0618e-04
Loss = 4.4227e-03, PNorm = 149.1142, GNorm = 0.2625, lr_0 = 3.0597e-04
Loss = 3.8972e-03, PNorm = 149.1224, GNorm = 0.2820, lr_0 = 3.0576e-04
Loss = 4.4412e-03, PNorm = 149.1337, GNorm = 0.1120, lr_0 = 3.0555e-04
Loss = 3.3101e-03, PNorm = 149.1441, GNorm = 0.1081, lr_0 = 3.0535e-04
Loss = 3.6866e-03, PNorm = 149.1548, GNorm = 0.2104, lr_0 = 3.0514e-04
Loss = 4.2986e-03, PNorm = 149.1605, GNorm = 0.1034, lr_0 = 3.0493e-04
Loss = 5.9031e-03, PNorm = 149.1637, GNorm = 0.2063, lr_0 = 3.0472e-04
Loss = 5.8201e-03, PNorm = 149.1703, GNorm = 0.1685, lr_0 = 3.0451e-04
Loss = 3.6393e-03, PNorm = 149.1807, GNorm = 0.1762, lr_0 = 3.0430e-04
Loss = 4.7631e-03, PNorm = 149.1941, GNorm = 0.3689, lr_0 = 3.0409e-04
Loss = 4.5223e-03, PNorm = 149.2088, GNorm = 0.1018, lr_0 = 3.0388e-04
Loss = 3.6545e-03, PNorm = 149.2227, GNorm = 0.1499, lr_0 = 3.0368e-04
Loss = 4.7735e-03, PNorm = 149.2313, GNorm = 0.1541, lr_0 = 3.0347e-04
Loss = 5.0278e-03, PNorm = 149.2407, GNorm = 0.5040, lr_0 = 3.0326e-04
Loss = 4.0591e-03, PNorm = 149.2534, GNorm = 0.2867, lr_0 = 3.0305e-04
Loss = 4.4643e-03, PNorm = 149.2622, GNorm = 0.1261, lr_0 = 3.0284e-04
Loss = 4.6952e-03, PNorm = 149.2730, GNorm = 0.3870, lr_0 = 3.0264e-04
Loss = 3.8253e-03, PNorm = 149.2837, GNorm = 0.0567, lr_0 = 3.0243e-04
Loss = 4.3238e-03, PNorm = 149.2928, GNorm = 0.0954, lr_0 = 3.0222e-04
Loss = 7.3460e-03, PNorm = 149.3015, GNorm = 0.1830, lr_0 = 3.0202e-04
Loss = 3.8567e-03, PNorm = 149.3126, GNorm = 0.2116, lr_0 = 3.0181e-04
Loss = 4.6638e-03, PNorm = 149.3248, GNorm = 0.1603, lr_0 = 3.0160e-04
Loss = 3.8564e-03, PNorm = 149.3348, GNorm = 0.2089, lr_0 = 3.0140e-04
Loss = 6.1673e-03, PNorm = 149.3415, GNorm = 0.2107, lr_0 = 3.0119e-04
Loss = 5.2525e-03, PNorm = 149.3481, GNorm = 0.1214, lr_0 = 3.0098e-04
Loss = 6.9427e-03, PNorm = 149.3552, GNorm = 0.1878, lr_0 = 3.0078e-04
Loss = 3.2666e-03, PNorm = 149.3671, GNorm = 0.2528, lr_0 = 3.0057e-04
Loss = 3.7305e-03, PNorm = 149.3796, GNorm = 0.2724, lr_0 = 3.0036e-04
Loss = 5.8572e-03, PNorm = 149.3891, GNorm = 0.2238, lr_0 = 3.0016e-04
Loss = 4.8877e-03, PNorm = 149.3976, GNorm = 0.4116, lr_0 = 2.9995e-04
Loss = 4.0849e-03, PNorm = 149.4080, GNorm = 0.1315, lr_0 = 2.9975e-04
Loss = 3.9526e-03, PNorm = 149.4208, GNorm = 0.3693, lr_0 = 2.9954e-04
Loss = 4.7386e-03, PNorm = 149.4405, GNorm = 0.0678, lr_0 = 2.9934e-04
Loss = 4.2877e-03, PNorm = 149.4539, GNorm = 0.2821, lr_0 = 2.9913e-04
Loss = 3.8840e-03, PNorm = 149.4645, GNorm = 0.0744, lr_0 = 2.9893e-04
Loss = 3.4314e-03, PNorm = 149.4722, GNorm = 0.1138, lr_0 = 2.9872e-04
Loss = 4.6815e-03, PNorm = 149.4834, GNorm = 0.2090, lr_0 = 2.9852e-04
Loss = 5.1579e-03, PNorm = 149.4929, GNorm = 0.1523, lr_0 = 2.9831e-04
Loss = 4.6507e-03, PNorm = 149.5036, GNorm = 0.1636, lr_0 = 2.9811e-04
Loss = 4.3931e-03, PNorm = 149.5126, GNorm = 0.2357, lr_0 = 2.9790e-04
Loss = 5.8364e-03, PNorm = 149.5186, GNorm = 0.0847, lr_0 = 2.9770e-04
Loss = 6.6101e-03, PNorm = 149.5285, GNorm = 0.2505, lr_0 = 2.9750e-04
Loss = 4.1140e-03, PNorm = 149.5395, GNorm = 0.1319, lr_0 = 2.9729e-04
Loss = 4.3301e-03, PNorm = 149.5502, GNorm = 0.3025, lr_0 = 2.9709e-04
Loss = 4.4778e-03, PNorm = 149.5609, GNorm = 0.0859, lr_0 = 2.9689e-04
Loss = 4.3180e-03, PNorm = 149.5711, GNorm = 0.2553, lr_0 = 2.9668e-04
Loss = 3.7629e-03, PNorm = 149.5774, GNorm = 0.1677, lr_0 = 2.9648e-04
Loss = 4.5259e-03, PNorm = 149.5846, GNorm = 0.2641, lr_0 = 2.9628e-04
Loss = 6.4514e-03, PNorm = 149.5937, GNorm = 0.3136, lr_0 = 2.9607e-04
Loss = 4.2531e-03, PNorm = 149.6048, GNorm = 0.2912, lr_0 = 2.9587e-04
Loss = 8.3321e-03, PNorm = 149.6160, GNorm = 0.1226, lr_0 = 2.9567e-04
Loss = 6.0272e-03, PNorm = 149.6267, GNorm = 0.3379, lr_0 = 2.9546e-04
Loss = 3.4082e-03, PNorm = 149.6363, GNorm = 0.0669, lr_0 = 2.9526e-04
Loss = 3.8594e-03, PNorm = 149.6462, GNorm = 0.1135, lr_0 = 2.9506e-04
Loss = 3.9250e-03, PNorm = 149.6591, GNorm = 0.2248, lr_0 = 2.9486e-04
Loss = 3.6209e-03, PNorm = 149.6679, GNorm = 0.2682, lr_0 = 2.9466e-04
Loss = 3.3296e-03, PNorm = 149.6771, GNorm = 0.2340, lr_0 = 2.9445e-04
Loss = 4.0227e-03, PNorm = 149.6854, GNorm = 0.0644, lr_0 = 2.9425e-04
Loss = 3.4644e-03, PNorm = 149.6943, GNorm = 0.1941, lr_0 = 2.9405e-04
Loss = 4.7496e-03, PNorm = 149.7000, GNorm = 0.1304, lr_0 = 2.9385e-04
Loss = 4.1297e-03, PNorm = 149.7077, GNorm = 0.1534, lr_0 = 2.9365e-04
Loss = 3.9709e-03, PNorm = 149.7187, GNorm = 0.1888, lr_0 = 2.9345e-04
Loss = 5.2125e-03, PNorm = 149.7330, GNorm = 0.2417, lr_0 = 2.9325e-04
Loss = 4.4947e-03, PNorm = 149.7431, GNorm = 0.2545, lr_0 = 2.9305e-04
Loss = 4.7366e-03, PNorm = 149.7567, GNorm = 0.1112, lr_0 = 2.9284e-04
Loss = 4.9979e-03, PNorm = 149.7690, GNorm = 0.1480, lr_0 = 2.9264e-04
Loss = 3.2975e-03, PNorm = 149.7779, GNorm = 0.2788, lr_0 = 2.9244e-04
Loss = 5.0129e-03, PNorm = 149.7866, GNorm = 0.1225, lr_0 = 2.9224e-04
Loss = 3.7717e-03, PNorm = 149.7993, GNorm = 0.2957, lr_0 = 2.9204e-04
Loss = 5.8236e-03, PNorm = 149.8055, GNorm = 0.1885, lr_0 = 2.9184e-04
Loss = 4.2502e-03, PNorm = 149.8165, GNorm = 0.2269, lr_0 = 2.9164e-04
Loss = 4.4327e-03, PNorm = 149.8287, GNorm = 0.1148, lr_0 = 2.9144e-04
Loss = 4.8091e-03, PNorm = 149.8408, GNorm = 0.1612, lr_0 = 2.9124e-04
Validation mae = 0.478363
Epoch 17
Loss = 3.1753e-03, PNorm = 149.8504, GNorm = 0.1960, lr_0 = 2.9104e-04
Loss = 4.3967e-03, PNorm = 149.8561, GNorm = 0.2471, lr_0 = 2.9084e-04
Loss = 3.0314e-03, PNorm = 149.8619, GNorm = 0.1281, lr_0 = 2.9065e-04
Loss = 3.8682e-03, PNorm = 149.8716, GNorm = 0.2426, lr_0 = 2.9045e-04
Loss = 4.1643e-03, PNorm = 149.8749, GNorm = 0.2688, lr_0 = 2.9025e-04
Loss = 3.1218e-03, PNorm = 149.8801, GNorm = 0.2282, lr_0 = 2.9005e-04
Loss = 3.0346e-03, PNorm = 149.8856, GNorm = 0.1292, lr_0 = 2.8985e-04
Loss = 3.9291e-03, PNorm = 149.8923, GNorm = 0.0865, lr_0 = 2.8965e-04
Loss = 4.0945e-03, PNorm = 149.9032, GNorm = 0.2825, lr_0 = 2.8945e-04
Loss = 3.4683e-03, PNorm = 149.9124, GNorm = 0.2469, lr_0 = 2.8925e-04
Loss = 3.3063e-03, PNorm = 149.9202, GNorm = 0.0771, lr_0 = 2.8906e-04
Loss = 3.3628e-03, PNorm = 149.9275, GNorm = 0.1113, lr_0 = 2.8886e-04
Loss = 4.3884e-03, PNorm = 149.9378, GNorm = 0.2178, lr_0 = 2.8866e-04
Loss = 3.9980e-03, PNorm = 149.9472, GNorm = 0.2239, lr_0 = 2.8846e-04
Loss = 4.3452e-03, PNorm = 149.9564, GNorm = 0.0791, lr_0 = 2.8826e-04
Loss = 3.7790e-03, PNorm = 149.9645, GNorm = 0.1330, lr_0 = 2.8807e-04
Loss = 3.9220e-03, PNorm = 149.9698, GNorm = 0.3462, lr_0 = 2.8787e-04
Loss = 3.0007e-03, PNorm = 149.9773, GNorm = 0.2656, lr_0 = 2.8767e-04
Loss = 4.1494e-03, PNorm = 149.9852, GNorm = 0.0567, lr_0 = 2.8748e-04
Loss = 3.6873e-03, PNorm = 149.9916, GNorm = 0.0629, lr_0 = 2.8728e-04
Loss = 3.1089e-03, PNorm = 149.9960, GNorm = 0.1825, lr_0 = 2.8708e-04
Loss = 3.5105e-03, PNorm = 150.0019, GNorm = 0.2612, lr_0 = 2.8689e-04
Loss = 3.8321e-03, PNorm = 150.0099, GNorm = 0.1586, lr_0 = 2.8669e-04
Loss = 3.8997e-03, PNorm = 150.0171, GNorm = 0.0849, lr_0 = 2.8649e-04
Loss = 3.5050e-03, PNorm = 150.0260, GNorm = 0.1680, lr_0 = 2.8630e-04
Loss = 5.4910e-03, PNorm = 150.0328, GNorm = 0.0939, lr_0 = 2.8610e-04
Loss = 3.5727e-03, PNorm = 150.0423, GNorm = 0.0727, lr_0 = 2.8590e-04
Loss = 5.1196e-03, PNorm = 150.0495, GNorm = 0.4356, lr_0 = 2.8571e-04
Loss = 3.0672e-03, PNorm = 150.0573, GNorm = 0.1836, lr_0 = 2.8551e-04
Loss = 4.3963e-03, PNorm = 150.0619, GNorm = 0.0762, lr_0 = 2.8532e-04
Loss = 5.9699e-03, PNorm = 150.0666, GNorm = 0.2109, lr_0 = 2.8512e-04
Loss = 3.9519e-03, PNorm = 150.0734, GNorm = 0.2430, lr_0 = 2.8493e-04
Loss = 5.4182e-03, PNorm = 150.0850, GNorm = 0.1247, lr_0 = 2.8473e-04
Loss = 4.7388e-03, PNorm = 150.0957, GNorm = 0.3216, lr_0 = 2.8454e-04
Loss = 2.9842e-03, PNorm = 150.1038, GNorm = 0.1004, lr_0 = 2.8434e-04
Loss = 4.3826e-03, PNorm = 150.1111, GNorm = 0.2096, lr_0 = 2.8415e-04
Loss = 3.4806e-03, PNorm = 150.1183, GNorm = 0.2817, lr_0 = 2.8395e-04
Loss = 3.2478e-03, PNorm = 150.1270, GNorm = 0.1555, lr_0 = 2.8376e-04
Loss = 3.2551e-03, PNorm = 150.1345, GNorm = 0.2785, lr_0 = 2.8356e-04
Loss = 3.9765e-03, PNorm = 150.1420, GNorm = 0.2010, lr_0 = 2.8337e-04
Loss = 3.5229e-03, PNorm = 150.1470, GNorm = 0.4853, lr_0 = 2.8317e-04
Loss = 5.7446e-03, PNorm = 150.1541, GNorm = 0.1679, lr_0 = 2.8298e-04
Loss = 4.8600e-03, PNorm = 150.1628, GNorm = 0.1665, lr_0 = 2.8279e-04
Loss = 5.8131e-03, PNorm = 150.1717, GNorm = 0.1638, lr_0 = 2.8259e-04
Loss = 3.2305e-03, PNorm = 150.1813, GNorm = 0.1264, lr_0 = 2.8240e-04
Loss = 3.4335e-03, PNorm = 150.1898, GNorm = 0.1841, lr_0 = 2.8221e-04
Loss = 4.0185e-03, PNorm = 150.1980, GNorm = 0.2448, lr_0 = 2.8201e-04
Loss = 4.3151e-03, PNorm = 150.2043, GNorm = 0.1018, lr_0 = 2.8182e-04
Loss = 4.1058e-03, PNorm = 150.2097, GNorm = 0.1717, lr_0 = 2.8163e-04
Loss = 4.0136e-03, PNorm = 150.2210, GNorm = 0.1391, lr_0 = 2.8143e-04
Loss = 3.7841e-03, PNorm = 150.2312, GNorm = 0.1802, lr_0 = 2.8124e-04
Loss = 3.4467e-03, PNorm = 150.2372, GNorm = 0.1748, lr_0 = 2.8105e-04
Loss = 4.0645e-03, PNorm = 150.2424, GNorm = 0.1661, lr_0 = 2.8085e-04
Loss = 4.3091e-03, PNorm = 150.2482, GNorm = 0.1555, lr_0 = 2.8066e-04
Loss = 3.5457e-03, PNorm = 150.2562, GNorm = 0.0857, lr_0 = 2.8047e-04
Loss = 3.4582e-03, PNorm = 150.2640, GNorm = 0.1028, lr_0 = 2.8028e-04
Loss = 5.4402e-03, PNorm = 150.2728, GNorm = 0.0891, lr_0 = 2.8009e-04
Loss = 3.1062e-03, PNorm = 150.2803, GNorm = 0.1128, lr_0 = 2.7989e-04
Loss = 2.8844e-03, PNorm = 150.2874, GNorm = 0.1620, lr_0 = 2.7970e-04
Loss = 5.1991e-03, PNorm = 150.2977, GNorm = 0.2470, lr_0 = 2.7951e-04
Loss = 6.2538e-03, PNorm = 150.3058, GNorm = 0.3404, lr_0 = 2.7932e-04
Loss = 3.0001e-03, PNorm = 150.3152, GNorm = 0.0645, lr_0 = 2.7913e-04
Loss = 3.6001e-03, PNorm = 150.3227, GNorm = 0.0888, lr_0 = 2.7894e-04
Loss = 5.6372e-03, PNorm = 150.3308, GNorm = 0.4035, lr_0 = 2.7875e-04
Loss = 3.3883e-03, PNorm = 150.3386, GNorm = 0.0634, lr_0 = 2.7855e-04
Loss = 3.9101e-03, PNorm = 150.3449, GNorm = 0.2026, lr_0 = 2.7836e-04
Loss = 4.4072e-03, PNorm = 150.3499, GNorm = 0.0857, lr_0 = 2.7817e-04
Loss = 3.9221e-03, PNorm = 150.3594, GNorm = 0.0428, lr_0 = 2.7798e-04
Loss = 3.9899e-03, PNorm = 150.3680, GNorm = 0.1087, lr_0 = 2.7779e-04
Loss = 3.3280e-03, PNorm = 150.3777, GNorm = 0.1945, lr_0 = 2.7760e-04
Loss = 3.2254e-03, PNorm = 150.3868, GNorm = 0.1610, lr_0 = 2.7741e-04
Loss = 3.1902e-03, PNorm = 150.3953, GNorm = 0.0881, lr_0 = 2.7722e-04
Loss = 3.7531e-03, PNorm = 150.4050, GNorm = 0.1815, lr_0 = 2.7703e-04
Loss = 4.0370e-03, PNorm = 150.4129, GNorm = 0.1539, lr_0 = 2.7684e-04
Loss = 2.9424e-03, PNorm = 150.4220, GNorm = 0.0739, lr_0 = 2.7665e-04
Loss = 4.1822e-03, PNorm = 150.4285, GNorm = 0.0901, lr_0 = 2.7646e-04
Loss = 3.9109e-03, PNorm = 150.4382, GNorm = 0.1002, lr_0 = 2.7627e-04
Loss = 3.6331e-03, PNorm = 150.4465, GNorm = 0.1652, lr_0 = 2.7608e-04
Loss = 3.6057e-03, PNorm = 150.4511, GNorm = 0.1927, lr_0 = 2.7590e-04
Loss = 3.8425e-03, PNorm = 150.4603, GNorm = 0.2147, lr_0 = 2.7571e-04
Loss = 3.5946e-03, PNorm = 150.4719, GNorm = 0.2768, lr_0 = 2.7552e-04
Loss = 4.7724e-03, PNorm = 150.4840, GNorm = 0.3306, lr_0 = 2.7533e-04
Loss = 2.9011e-03, PNorm = 150.4918, GNorm = 0.1087, lr_0 = 2.7514e-04
Loss = 3.0858e-03, PNorm = 150.4992, GNorm = 0.1113, lr_0 = 2.7495e-04
Loss = 3.1126e-03, PNorm = 150.5060, GNorm = 0.0734, lr_0 = 2.7476e-04
Loss = 3.3308e-03, PNorm = 150.5131, GNorm = 0.1335, lr_0 = 2.7457e-04
Loss = 3.2452e-03, PNorm = 150.5191, GNorm = 0.0478, lr_0 = 2.7439e-04
Loss = 3.2300e-03, PNorm = 150.5265, GNorm = 0.2557, lr_0 = 2.7420e-04
Loss = 2.8675e-03, PNorm = 150.5350, GNorm = 0.1282, lr_0 = 2.7401e-04
Loss = 3.5372e-03, PNorm = 150.5423, GNorm = 0.0846, lr_0 = 2.7382e-04
Loss = 3.8229e-03, PNorm = 150.5464, GNorm = 0.2466, lr_0 = 2.7364e-04
Loss = 3.7147e-03, PNorm = 150.5538, GNorm = 0.1090, lr_0 = 2.7345e-04
Loss = 3.6756e-03, PNorm = 150.5646, GNorm = 0.1360, lr_0 = 2.7326e-04
Loss = 3.8798e-03, PNorm = 150.5756, GNorm = 0.3147, lr_0 = 2.7307e-04
Loss = 6.0699e-03, PNorm = 150.5805, GNorm = 0.2400, lr_0 = 2.7289e-04
Loss = 4.6863e-03, PNorm = 150.5874, GNorm = 0.0542, lr_0 = 2.7270e-04
Loss = 3.6307e-03, PNorm = 150.5958, GNorm = 0.2463, lr_0 = 2.7251e-04
Loss = 3.5106e-03, PNorm = 150.6051, GNorm = 0.0746, lr_0 = 2.7233e-04
Loss = 3.6895e-03, PNorm = 150.6113, GNorm = 0.1669, lr_0 = 2.7214e-04
Loss = 4.1033e-03, PNorm = 150.6198, GNorm = 0.2681, lr_0 = 2.7195e-04
Loss = 4.6198e-03, PNorm = 150.6304, GNorm = 0.1865, lr_0 = 2.7177e-04
Loss = 3.0039e-03, PNorm = 150.6396, GNorm = 0.1482, lr_0 = 2.7158e-04
Loss = 3.3642e-03, PNorm = 150.6491, GNorm = 0.1925, lr_0 = 2.7139e-04
Loss = 4.1828e-03, PNorm = 150.6582, GNorm = 0.3888, lr_0 = 2.7121e-04
Loss = 3.2160e-03, PNorm = 150.6665, GNorm = 0.2121, lr_0 = 2.7102e-04
Loss = 3.3212e-03, PNorm = 150.6757, GNorm = 0.1571, lr_0 = 2.7084e-04
Loss = 3.2188e-03, PNorm = 150.6875, GNorm = 0.1876, lr_0 = 2.7065e-04
Loss = 4.2018e-03, PNorm = 150.6988, GNorm = 0.2381, lr_0 = 2.7047e-04
Loss = 5.5902e-03, PNorm = 150.7069, GNorm = 0.0539, lr_0 = 2.7028e-04
Loss = 4.0496e-03, PNorm = 150.7158, GNorm = 0.1300, lr_0 = 2.7010e-04
Loss = 3.9274e-03, PNorm = 150.7231, GNorm = 0.1485, lr_0 = 2.6991e-04
Loss = 7.6708e-03, PNorm = 150.7297, GNorm = 0.2704, lr_0 = 2.6973e-04
Loss = 7.0531e-03, PNorm = 150.7355, GNorm = 0.0806, lr_0 = 2.6954e-04
Loss = 4.2684e-03, PNorm = 150.7451, GNorm = 0.1602, lr_0 = 2.6936e-04
Loss = 3.6948e-03, PNorm = 150.7552, GNorm = 0.1819, lr_0 = 2.6917e-04
Loss = 4.2204e-03, PNorm = 150.7636, GNorm = 0.3440, lr_0 = 2.6899e-04
Loss = 3.2983e-03, PNorm = 150.7737, GNorm = 0.1666, lr_0 = 2.6880e-04
Loss = 2.9834e-03, PNorm = 150.7858, GNorm = 0.1968, lr_0 = 2.6862e-04
Loss = 4.2721e-03, PNorm = 150.7941, GNorm = 0.1922, lr_0 = 2.6844e-04
Loss = 3.9374e-03, PNorm = 150.8017, GNorm = 0.1495, lr_0 = 2.6825e-04
Validation mae = 0.478147
Epoch 18
Loss = 3.5056e-03, PNorm = 150.8089, GNorm = 0.0876, lr_0 = 2.6807e-04
Loss = 2.9600e-03, PNorm = 150.8136, GNorm = 0.1505, lr_0 = 2.6788e-04
Loss = 3.1244e-03, PNorm = 150.8204, GNorm = 0.0850, lr_0 = 2.6770e-04
Loss = 3.0387e-03, PNorm = 150.8295, GNorm = 0.0772, lr_0 = 2.6752e-04
Loss = 3.2742e-03, PNorm = 150.8366, GNorm = 0.1355, lr_0 = 2.6733e-04
Loss = 2.9370e-03, PNorm = 150.8413, GNorm = 0.1059, lr_0 = 2.6715e-04
Loss = 2.8849e-03, PNorm = 150.8455, GNorm = 0.0560, lr_0 = 2.6697e-04
Loss = 3.0350e-03, PNorm = 150.8529, GNorm = 0.1868, lr_0 = 2.6678e-04
Loss = 2.7923e-03, PNorm = 150.8576, GNorm = 0.0944, lr_0 = 2.6660e-04
Loss = 3.1098e-03, PNorm = 150.8616, GNorm = 0.0468, lr_0 = 2.6642e-04
Loss = 2.9798e-03, PNorm = 150.8695, GNorm = 0.1318, lr_0 = 2.6624e-04
Loss = 2.6795e-03, PNorm = 150.8776, GNorm = 0.1451, lr_0 = 2.6605e-04
Loss = 4.0066e-03, PNorm = 150.8849, GNorm = 0.2916, lr_0 = 2.6587e-04
Loss = 2.9528e-03, PNorm = 150.8919, GNorm = 0.0843, lr_0 = 2.6569e-04
Loss = 2.9769e-03, PNorm = 150.8953, GNorm = 0.2270, lr_0 = 2.6551e-04
Loss = 2.8928e-03, PNorm = 150.9014, GNorm = 0.1366, lr_0 = 2.6533e-04
Loss = 3.9572e-03, PNorm = 150.9101, GNorm = 0.1710, lr_0 = 2.6514e-04
Loss = 3.4831e-03, PNorm = 150.9177, GNorm = 0.0547, lr_0 = 2.6496e-04
Loss = 2.5564e-03, PNorm = 150.9235, GNorm = 0.1016, lr_0 = 2.6478e-04
Loss = 3.4664e-03, PNorm = 150.9297, GNorm = 0.0770, lr_0 = 2.6460e-04
Loss = 3.0873e-03, PNorm = 150.9373, GNorm = 0.1315, lr_0 = 2.6442e-04
Loss = 3.4065e-03, PNorm = 150.9458, GNorm = 0.0735, lr_0 = 2.6424e-04
Loss = 2.8830e-03, PNorm = 150.9542, GNorm = 0.0788, lr_0 = 2.6406e-04
Loss = 2.6152e-03, PNorm = 150.9624, GNorm = 0.1295, lr_0 = 2.6388e-04
Loss = 2.8052e-03, PNorm = 150.9708, GNorm = 0.1675, lr_0 = 2.6369e-04
Loss = 4.3332e-03, PNorm = 150.9738, GNorm = 0.2413, lr_0 = 2.6351e-04
Loss = 3.1095e-03, PNorm = 150.9784, GNorm = 0.2054, lr_0 = 2.6333e-04
Loss = 2.6822e-03, PNorm = 150.9819, GNorm = 0.0790, lr_0 = 2.6315e-04
Loss = 3.6467e-03, PNorm = 150.9897, GNorm = 0.1179, lr_0 = 2.6297e-04
Loss = 2.5680e-03, PNorm = 150.9967, GNorm = 0.0585, lr_0 = 2.6279e-04
Loss = 2.9270e-03, PNorm = 151.0016, GNorm = 0.0834, lr_0 = 2.6261e-04
Loss = 3.8682e-03, PNorm = 151.0097, GNorm = 0.0725, lr_0 = 2.6243e-04
Loss = 4.1895e-03, PNorm = 151.0184, GNorm = 0.1502, lr_0 = 2.6225e-04
Loss = 2.8016e-03, PNorm = 151.0239, GNorm = 0.1362, lr_0 = 2.6207e-04
Loss = 2.5166e-03, PNorm = 151.0310, GNorm = 0.0890, lr_0 = 2.6189e-04
Loss = 3.5414e-03, PNorm = 151.0364, GNorm = 0.3115, lr_0 = 2.6171e-04
Loss = 4.2076e-03, PNorm = 151.0392, GNorm = 0.2195, lr_0 = 2.6153e-04
Loss = 4.0071e-03, PNorm = 151.0428, GNorm = 0.0759, lr_0 = 2.6136e-04
Loss = 2.8812e-03, PNorm = 151.0475, GNorm = 0.0961, lr_0 = 2.6118e-04
Loss = 2.5319e-03, PNorm = 151.0541, GNorm = 0.1874, lr_0 = 2.6100e-04
Loss = 3.7672e-03, PNorm = 151.0611, GNorm = 0.0903, lr_0 = 2.6082e-04
Loss = 2.7646e-03, PNorm = 151.0664, GNorm = 0.1608, lr_0 = 2.6064e-04
Loss = 3.1640e-03, PNorm = 151.0712, GNorm = 0.0579, lr_0 = 2.6046e-04
Loss = 3.1711e-03, PNorm = 151.0770, GNorm = 0.1248, lr_0 = 2.6028e-04
Loss = 2.6959e-03, PNorm = 151.0876, GNorm = 0.0990, lr_0 = 2.6011e-04
Loss = 3.3286e-03, PNorm = 151.0969, GNorm = 0.3073, lr_0 = 2.5993e-04
Loss = 6.6721e-03, PNorm = 151.1009, GNorm = 0.0727, lr_0 = 2.5975e-04
Loss = 2.9471e-03, PNorm = 151.1071, GNorm = 0.3674, lr_0 = 2.5957e-04
Loss = 3.4200e-03, PNorm = 151.1119, GNorm = 0.0664, lr_0 = 2.5939e-04
Loss = 2.8160e-03, PNorm = 151.1152, GNorm = 0.1422, lr_0 = 2.5922e-04
Loss = 2.9498e-03, PNorm = 151.1235, GNorm = 0.2311, lr_0 = 2.5904e-04
Loss = 2.6728e-03, PNorm = 151.1322, GNorm = 0.2847, lr_0 = 2.5886e-04
Loss = 3.8609e-03, PNorm = 151.1403, GNorm = 0.1407, lr_0 = 2.5868e-04
Loss = 3.5418e-03, PNorm = 151.1469, GNorm = 0.1533, lr_0 = 2.5851e-04
Loss = 3.6389e-03, PNorm = 151.1551, GNorm = 0.1720, lr_0 = 2.5833e-04
Loss = 3.3987e-03, PNorm = 151.1634, GNorm = 0.1739, lr_0 = 2.5815e-04
Loss = 3.6822e-03, PNorm = 151.1690, GNorm = 0.0560, lr_0 = 2.5797e-04
Loss = 3.5651e-03, PNorm = 151.1741, GNorm = 0.1674, lr_0 = 2.5780e-04
Loss = 4.4696e-03, PNorm = 151.1828, GNorm = 0.1221, lr_0 = 2.5762e-04
Loss = 3.5724e-03, PNorm = 151.1904, GNorm = 0.0825, lr_0 = 2.5745e-04
Loss = 3.9365e-03, PNorm = 151.1958, GNorm = 0.1908, lr_0 = 2.5727e-04
Loss = 6.4167e-03, PNorm = 151.2044, GNorm = 0.0825, lr_0 = 2.5709e-04
Loss = 2.7651e-03, PNorm = 151.2130, GNorm = 0.0601, lr_0 = 2.5692e-04
Loss = 3.1123e-03, PNorm = 151.2213, GNorm = 0.1825, lr_0 = 2.5674e-04
Loss = 3.7074e-03, PNorm = 151.2297, GNorm = 0.0763, lr_0 = 2.5656e-04
Loss = 3.0847e-03, PNorm = 151.2383, GNorm = 0.2011, lr_0 = 2.5639e-04
Loss = 4.3361e-03, PNorm = 151.2453, GNorm = 0.3614, lr_0 = 2.5621e-04
Loss = 2.8175e-03, PNorm = 151.2519, GNorm = 0.1390, lr_0 = 2.5604e-04
Loss = 3.2956e-03, PNorm = 151.2577, GNorm = 0.1449, lr_0 = 2.5586e-04
Loss = 4.0967e-03, PNorm = 151.2642, GNorm = 0.2164, lr_0 = 2.5569e-04
Loss = 2.7065e-03, PNorm = 151.2699, GNorm = 0.0961, lr_0 = 2.5551e-04
Loss = 2.7497e-03, PNorm = 151.2760, GNorm = 0.0900, lr_0 = 2.5534e-04
Loss = 3.6085e-03, PNorm = 151.2830, GNorm = 0.2666, lr_0 = 2.5516e-04
Loss = 2.5059e-03, PNorm = 151.2910, GNorm = 0.0731, lr_0 = 2.5499e-04
Loss = 4.2449e-03, PNorm = 151.2982, GNorm = 0.1629, lr_0 = 2.5481e-04
Loss = 4.6162e-03, PNorm = 151.3049, GNorm = 0.2528, lr_0 = 2.5464e-04
Loss = 4.4027e-03, PNorm = 151.3129, GNorm = 0.2823, lr_0 = 2.5446e-04
Loss = 3.1752e-03, PNorm = 151.3207, GNorm = 0.0561, lr_0 = 2.5429e-04
Loss = 3.3122e-03, PNorm = 151.3283, GNorm = 0.2350, lr_0 = 2.5411e-04
Loss = 5.6927e-03, PNorm = 151.3367, GNorm = 0.1012, lr_0 = 2.5394e-04
Loss = 2.9770e-03, PNorm = 151.3466, GNorm = 0.1606, lr_0 = 2.5377e-04
Loss = 3.6355e-03, PNorm = 151.3536, GNorm = 0.1453, lr_0 = 2.5359e-04
Loss = 2.7827e-03, PNorm = 151.3613, GNorm = 0.1128, lr_0 = 2.5342e-04
Loss = 4.9663e-03, PNorm = 151.3696, GNorm = 0.2391, lr_0 = 2.5325e-04
Loss = 3.7526e-03, PNorm = 151.3754, GNorm = 0.1615, lr_0 = 2.5307e-04
Loss = 3.0986e-03, PNorm = 151.3801, GNorm = 0.0972, lr_0 = 2.5290e-04
Loss = 3.5600e-03, PNorm = 151.3867, GNorm = 0.2012, lr_0 = 2.5273e-04
Loss = 3.4314e-03, PNorm = 151.3931, GNorm = 0.5415, lr_0 = 2.5255e-04
Loss = 2.6810e-03, PNorm = 151.3987, GNorm = 0.1285, lr_0 = 2.5238e-04
Loss = 2.2564e-03, PNorm = 151.4074, GNorm = 0.0756, lr_0 = 2.5221e-04
Loss = 4.7187e-03, PNorm = 151.4155, GNorm = 0.1577, lr_0 = 2.5203e-04
Loss = 2.5045e-03, PNorm = 151.4204, GNorm = 0.1599, lr_0 = 2.5186e-04
Loss = 2.4719e-03, PNorm = 151.4278, GNorm = 0.1745, lr_0 = 2.5169e-04
Loss = 4.1654e-03, PNorm = 151.4369, GNorm = 0.1768, lr_0 = 2.5152e-04
Loss = 2.9801e-03, PNorm = 151.4430, GNorm = 0.0984, lr_0 = 2.5134e-04
Loss = 3.1271e-03, PNorm = 151.4496, GNorm = 0.2104, lr_0 = 2.5117e-04
Loss = 3.2552e-03, PNorm = 151.4539, GNorm = 0.2425, lr_0 = 2.5100e-04
Loss = 2.3462e-03, PNorm = 151.4610, GNorm = 0.2034, lr_0 = 2.5083e-04
Loss = 4.8192e-03, PNorm = 151.4665, GNorm = 0.2455, lr_0 = 2.5066e-04
Loss = 5.1090e-03, PNorm = 151.4712, GNorm = 0.4845, lr_0 = 2.5048e-04
Loss = 3.3481e-03, PNorm = 151.4784, GNorm = 0.1489, lr_0 = 2.5031e-04
Loss = 4.6939e-03, PNorm = 151.4832, GNorm = 0.1190, lr_0 = 2.5014e-04
Loss = 3.6438e-03, PNorm = 151.4900, GNorm = 0.6043, lr_0 = 2.4997e-04
Loss = 5.0317e-03, PNorm = 151.4991, GNorm = 0.2531, lr_0 = 2.4980e-04
Loss = 2.8235e-03, PNorm = 151.5080, GNorm = 0.0575, lr_0 = 2.4963e-04
Loss = 3.7755e-03, PNorm = 151.5143, GNorm = 0.1130, lr_0 = 2.4946e-04
Loss = 3.3280e-03, PNorm = 151.5180, GNorm = 0.1515, lr_0 = 2.4929e-04
Loss = 3.5603e-03, PNorm = 151.5221, GNorm = 0.3711, lr_0 = 2.4911e-04
Loss = 3.0489e-03, PNorm = 151.5290, GNorm = 0.3313, lr_0 = 2.4894e-04
Loss = 2.9356e-03, PNorm = 151.5410, GNorm = 0.1088, lr_0 = 2.4877e-04
Loss = 4.5742e-03, PNorm = 151.5483, GNorm = 0.3092, lr_0 = 2.4860e-04
Loss = 3.2863e-03, PNorm = 151.5531, GNorm = 0.0684, lr_0 = 2.4843e-04
Loss = 6.0069e-03, PNorm = 151.5613, GNorm = 0.0960, lr_0 = 2.4826e-04
Loss = 6.1942e-03, PNorm = 151.5701, GNorm = 0.1611, lr_0 = 2.4809e-04
Loss = 3.8612e-03, PNorm = 151.5777, GNorm = 0.1591, lr_0 = 2.4792e-04
Loss = 2.8626e-03, PNorm = 151.5844, GNorm = 0.1036, lr_0 = 2.4775e-04
Loss = 4.0007e-03, PNorm = 151.5865, GNorm = 0.1075, lr_0 = 2.4758e-04
Loss = 2.4813e-03, PNorm = 151.5913, GNorm = 0.0745, lr_0 = 2.4741e-04
Loss = 3.5078e-03, PNorm = 151.5978, GNorm = 0.1343, lr_0 = 2.4724e-04
Loss = 3.5641e-03, PNorm = 151.6032, GNorm = 0.3124, lr_0 = 2.4707e-04
Validation mae = 0.475881
Epoch 19
Loss = 3.1153e-03, PNorm = 151.6089, GNorm = 0.1274, lr_0 = 2.4690e-04
Loss = 2.4777e-03, PNorm = 151.6140, GNorm = 0.0991, lr_0 = 2.4674e-04
Loss = 3.2980e-03, PNorm = 151.6172, GNorm = 0.1168, lr_0 = 2.4657e-04
Loss = 6.0098e-03, PNorm = 151.6219, GNorm = 0.1933, lr_0 = 2.4640e-04
Loss = 3.0944e-03, PNorm = 151.6259, GNorm = 0.1832, lr_0 = 2.4623e-04
Loss = 3.5005e-03, PNorm = 151.6325, GNorm = 0.1243, lr_0 = 2.4606e-04
Loss = 3.1115e-03, PNorm = 151.6427, GNorm = 0.1802, lr_0 = 2.4589e-04
Loss = 2.6668e-03, PNorm = 151.6496, GNorm = 0.1679, lr_0 = 2.4572e-04
Loss = 2.4822e-03, PNorm = 151.6554, GNorm = 0.1260, lr_0 = 2.4556e-04
Loss = 2.2307e-03, PNorm = 151.6590, GNorm = 0.1323, lr_0 = 2.4539e-04
Loss = 3.6022e-03, PNorm = 151.6633, GNorm = 0.1929, lr_0 = 2.4522e-04
Loss = 2.7619e-03, PNorm = 151.6707, GNorm = 0.3420, lr_0 = 2.4505e-04
Loss = 3.9507e-03, PNorm = 151.6755, GNorm = 0.2260, lr_0 = 2.4488e-04
Loss = 2.5714e-03, PNorm = 151.6798, GNorm = 0.3068, lr_0 = 2.4472e-04
Loss = 3.5939e-03, PNorm = 151.6840, GNorm = 0.0943, lr_0 = 2.4455e-04
Loss = 1.8174e-03, PNorm = 151.6862, GNorm = 0.0982, lr_0 = 2.4438e-04
Loss = 2.3250e-03, PNorm = 151.6907, GNorm = 0.1967, lr_0 = 2.4421e-04
Loss = 2.8573e-03, PNorm = 151.6944, GNorm = 0.0966, lr_0 = 2.4405e-04
Loss = 2.5306e-03, PNorm = 151.7011, GNorm = 0.0594, lr_0 = 2.4388e-04
Loss = 3.6770e-03, PNorm = 151.7069, GNorm = 0.1296, lr_0 = 2.4371e-04
Loss = 2.5295e-03, PNorm = 151.7121, GNorm = 0.1839, lr_0 = 2.4354e-04
Loss = 3.3229e-03, PNorm = 151.7174, GNorm = 0.1703, lr_0 = 2.4338e-04
Loss = 2.5771e-03, PNorm = 151.7230, GNorm = 0.0546, lr_0 = 2.4321e-04
Loss = 3.6642e-03, PNorm = 151.7289, GNorm = 0.2038, lr_0 = 2.4304e-04
Loss = 2.3289e-03, PNorm = 151.7348, GNorm = 0.1033, lr_0 = 2.4288e-04
Loss = 2.9441e-03, PNorm = 151.7394, GNorm = 0.2311, lr_0 = 2.4271e-04
Loss = 1.9746e-03, PNorm = 151.7449, GNorm = 0.1041, lr_0 = 2.4254e-04
Loss = 2.7266e-03, PNorm = 151.7507, GNorm = 0.2228, lr_0 = 2.4238e-04
Loss = 2.4786e-03, PNorm = 151.7559, GNorm = 0.1152, lr_0 = 2.4221e-04
Loss = 3.1501e-03, PNorm = 151.7584, GNorm = 0.1219, lr_0 = 2.4205e-04
Loss = 2.8656e-03, PNorm = 151.7642, GNorm = 0.0708, lr_0 = 2.4188e-04
Loss = 2.2328e-03, PNorm = 151.7686, GNorm = 0.2093, lr_0 = 2.4171e-04
Loss = 2.9606e-03, PNorm = 151.7760, GNorm = 0.1526, lr_0 = 2.4155e-04
Loss = 2.5106e-03, PNorm = 151.7830, GNorm = 0.2018, lr_0 = 2.4138e-04
Loss = 2.3547e-03, PNorm = 151.7884, GNorm = 0.2144, lr_0 = 2.4122e-04
Loss = 3.0107e-03, PNorm = 151.7929, GNorm = 0.3008, lr_0 = 2.4105e-04
Loss = 2.0401e-03, PNorm = 151.7958, GNorm = 0.0600, lr_0 = 2.4089e-04
Loss = 3.0764e-03, PNorm = 151.8011, GNorm = 0.2529, lr_0 = 2.4072e-04
Loss = 2.9871e-03, PNorm = 151.8077, GNorm = 0.1087, lr_0 = 2.4056e-04
Loss = 3.0184e-03, PNorm = 151.8119, GNorm = 0.1956, lr_0 = 2.4039e-04
Loss = 2.4894e-03, PNorm = 151.8184, GNorm = 0.1587, lr_0 = 2.4023e-04
Loss = 3.1811e-03, PNorm = 151.8280, GNorm = 0.1126, lr_0 = 2.4006e-04
Loss = 2.9318e-03, PNorm = 151.8362, GNorm = 0.1235, lr_0 = 2.3990e-04
Loss = 3.3173e-03, PNorm = 151.8433, GNorm = 0.0945, lr_0 = 2.3974e-04
Loss = 2.3732e-03, PNorm = 151.8484, GNorm = 0.1979, lr_0 = 2.3957e-04
Loss = 3.0808e-03, PNorm = 151.8531, GNorm = 0.1103, lr_0 = 2.3941e-04
Loss = 3.6212e-03, PNorm = 151.8573, GNorm = 0.0840, lr_0 = 2.3924e-04
Loss = 2.7065e-03, PNorm = 151.8631, GNorm = 0.1735, lr_0 = 2.3908e-04
Loss = 3.2778e-03, PNorm = 151.8687, GNorm = 0.0730, lr_0 = 2.3892e-04
Loss = 3.1165e-03, PNorm = 151.8760, GNorm = 0.0580, lr_0 = 2.3875e-04
Loss = 2.6034e-03, PNorm = 151.8836, GNorm = 0.1629, lr_0 = 2.3859e-04
Loss = 2.5010e-03, PNorm = 151.8885, GNorm = 0.2518, lr_0 = 2.3842e-04
Loss = 2.8734e-03, PNorm = 151.8947, GNorm = 0.0689, lr_0 = 2.3826e-04
Loss = 2.8111e-03, PNorm = 151.9016, GNorm = 0.1575, lr_0 = 2.3810e-04
Loss = 2.1283e-03, PNorm = 151.9064, GNorm = 0.0967, lr_0 = 2.3794e-04
Loss = 2.6640e-03, PNorm = 151.9097, GNorm = 0.1687, lr_0 = 2.3777e-04
Loss = 2.2901e-03, PNorm = 151.9132, GNorm = 0.0984, lr_0 = 2.3761e-04
Loss = 2.6434e-03, PNorm = 151.9160, GNorm = 0.0687, lr_0 = 2.3745e-04
Loss = 3.4441e-03, PNorm = 151.9219, GNorm = 0.1436, lr_0 = 2.3728e-04
Loss = 2.5544e-03, PNorm = 151.9279, GNorm = 0.0460, lr_0 = 2.3712e-04
Loss = 3.0245e-03, PNorm = 151.9314, GNorm = 0.1782, lr_0 = 2.3696e-04
Loss = 2.0655e-03, PNorm = 151.9399, GNorm = 0.0983, lr_0 = 2.3680e-04
Loss = 2.2444e-03, PNorm = 151.9484, GNorm = 0.2114, lr_0 = 2.3663e-04
Loss = 2.2563e-03, PNorm = 151.9554, GNorm = 0.1382, lr_0 = 2.3647e-04
Loss = 2.9805e-03, PNorm = 151.9605, GNorm = 0.1336, lr_0 = 2.3631e-04
Loss = 2.7182e-03, PNorm = 151.9647, GNorm = 0.2090, lr_0 = 2.3615e-04
Loss = 3.8195e-03, PNorm = 151.9693, GNorm = 0.2586, lr_0 = 2.3599e-04
Loss = 2.9073e-03, PNorm = 151.9748, GNorm = 0.0397, lr_0 = 2.3582e-04
Loss = 3.6078e-03, PNorm = 151.9812, GNorm = 0.2359, lr_0 = 2.3566e-04
Loss = 2.2136e-03, PNorm = 151.9851, GNorm = 0.1151, lr_0 = 2.3550e-04
Loss = 3.0502e-03, PNorm = 151.9906, GNorm = 0.1900, lr_0 = 2.3534e-04
Loss = 3.4310e-03, PNorm = 151.9953, GNorm = 0.2455, lr_0 = 2.3518e-04
Loss = 3.2323e-03, PNorm = 152.0030, GNorm = 0.2162, lr_0 = 2.3502e-04
Loss = 2.0631e-03, PNorm = 152.0094, GNorm = 0.0705, lr_0 = 2.3486e-04
Loss = 3.1972e-03, PNorm = 152.0166, GNorm = 0.1743, lr_0 = 2.3470e-04
Loss = 3.1104e-03, PNorm = 152.0257, GNorm = 0.1190, lr_0 = 2.3454e-04
Loss = 2.3558e-03, PNorm = 152.0337, GNorm = 0.1181, lr_0 = 2.3437e-04
Loss = 2.5012e-03, PNorm = 152.0413, GNorm = 0.0995, lr_0 = 2.3421e-04
Loss = 2.1721e-03, PNorm = 152.0444, GNorm = 0.2672, lr_0 = 2.3405e-04
Loss = 5.9215e-03, PNorm = 152.0493, GNorm = 0.1694, lr_0 = 2.3389e-04
Loss = 2.6468e-03, PNorm = 152.0573, GNorm = 0.0756, lr_0 = 2.3373e-04
Loss = 5.1480e-03, PNorm = 152.0616, GNorm = 0.6017, lr_0 = 2.3357e-04
Loss = 2.2238e-03, PNorm = 152.0677, GNorm = 0.0794, lr_0 = 2.3341e-04
Loss = 3.4882e-03, PNorm = 152.0771, GNorm = 0.0749, lr_0 = 2.3325e-04
Loss = 3.6091e-03, PNorm = 152.0830, GNorm = 0.3849, lr_0 = 2.3309e-04
Loss = 4.4018e-03, PNorm = 152.0904, GNorm = 0.2029, lr_0 = 2.3293e-04
Loss = 3.0943e-03, PNorm = 152.1003, GNorm = 0.2986, lr_0 = 2.3277e-04
Loss = 3.3680e-03, PNorm = 152.1068, GNorm = 0.5457, lr_0 = 2.3261e-04
Loss = 3.0019e-03, PNorm = 152.1145, GNorm = 0.2464, lr_0 = 2.3246e-04
Loss = 2.7733e-03, PNorm = 152.1228, GNorm = 0.2145, lr_0 = 2.3230e-04
Loss = 2.2589e-03, PNorm = 152.1284, GNorm = 0.1514, lr_0 = 2.3214e-04
Loss = 4.1002e-03, PNorm = 152.1324, GNorm = 0.0582, lr_0 = 2.3198e-04
Loss = 2.6036e-03, PNorm = 152.1371, GNorm = 0.1099, lr_0 = 2.3182e-04
Loss = 4.4675e-03, PNorm = 152.1442, GNorm = 0.3908, lr_0 = 2.3166e-04
Loss = 3.5003e-03, PNorm = 152.1506, GNorm = 0.1593, lr_0 = 2.3150e-04
Loss = 2.5692e-03, PNorm = 152.1561, GNorm = 0.2369, lr_0 = 2.3134e-04
Loss = 3.7553e-03, PNorm = 152.1609, GNorm = 0.1601, lr_0 = 2.3118e-04
Loss = 3.9301e-03, PNorm = 152.1663, GNorm = 0.2296, lr_0 = 2.3103e-04
Loss = 2.7842e-03, PNorm = 152.1724, GNorm = 0.0980, lr_0 = 2.3087e-04
Loss = 2.4520e-03, PNorm = 152.1784, GNorm = 0.0970, lr_0 = 2.3071e-04
Loss = 2.7450e-03, PNorm = 152.1852, GNorm = 0.0646, lr_0 = 2.3055e-04
Loss = 3.5652e-03, PNorm = 152.1917, GNorm = 0.1677, lr_0 = 2.3039e-04
Loss = 5.4531e-03, PNorm = 152.1981, GNorm = 0.0931, lr_0 = 2.3024e-04
Loss = 3.0293e-03, PNorm = 152.2029, GNorm = 0.2032, lr_0 = 2.3008e-04
Loss = 2.4435e-03, PNorm = 152.2125, GNorm = 0.3430, lr_0 = 2.2992e-04
Loss = 2.6302e-03, PNorm = 152.2165, GNorm = 0.1189, lr_0 = 2.2976e-04
Loss = 2.4476e-03, PNorm = 152.2207, GNorm = 0.2673, lr_0 = 2.2961e-04
Loss = 3.0943e-03, PNorm = 152.2286, GNorm = 0.0949, lr_0 = 2.2945e-04
Loss = 4.9872e-03, PNorm = 152.2382, GNorm = 0.0825, lr_0 = 2.2929e-04
Loss = 4.4670e-03, PNorm = 152.2482, GNorm = 0.3979, lr_0 = 2.2913e-04
Loss = 3.8945e-03, PNorm = 152.2565, GNorm = 0.2482, lr_0 = 2.2898e-04
Loss = 4.9365e-03, PNorm = 152.2628, GNorm = 0.0993, lr_0 = 2.2882e-04
Loss = 3.2560e-03, PNorm = 152.2698, GNorm = 0.0517, lr_0 = 2.2866e-04
Loss = 4.5996e-03, PNorm = 152.2741, GNorm = 0.0996, lr_0 = 2.2851e-04
Loss = 2.7534e-03, PNorm = 152.2789, GNorm = 0.1593, lr_0 = 2.2835e-04
Loss = 3.3588e-03, PNorm = 152.2836, GNorm = 0.1395, lr_0 = 2.2819e-04
Loss = 3.0681e-03, PNorm = 152.2893, GNorm = 0.1058, lr_0 = 2.2804e-04
Loss = 4.2252e-03, PNorm = 152.2972, GNorm = 0.1719, lr_0 = 2.2788e-04
Loss = 4.2324e-03, PNorm = 152.3030, GNorm = 0.1011, lr_0 = 2.2773e-04
Loss = 2.9396e-03, PNorm = 152.3049, GNorm = 0.0937, lr_0 = 2.2757e-04
Validation mae = 0.477382
Epoch 20
Loss = 3.6716e-03, PNorm = 152.3056, GNorm = 0.0993, lr_0 = 2.2741e-04
Loss = 2.8355e-03, PNorm = 152.3100, GNorm = 0.1453, lr_0 = 2.2726e-04
Loss = 2.7081e-03, PNorm = 152.3138, GNorm = 0.0733, lr_0 = 2.2710e-04
Loss = 3.4203e-03, PNorm = 152.3231, GNorm = 0.3975, lr_0 = 2.2695e-04
Loss = 2.7347e-03, PNorm = 152.3283, GNorm = 0.3057, lr_0 = 2.2679e-04
Loss = 2.7772e-03, PNorm = 152.3331, GNorm = 0.0572, lr_0 = 2.2664e-04
Loss = 2.6727e-03, PNorm = 152.3379, GNorm = 0.1305, lr_0 = 2.2648e-04
Loss = 4.3049e-03, PNorm = 152.3398, GNorm = 0.0698, lr_0 = 2.2632e-04
Loss = 3.8122e-03, PNorm = 152.3437, GNorm = 0.1953, lr_0 = 2.2617e-04
Loss = 2.4582e-03, PNorm = 152.3496, GNorm = 0.1961, lr_0 = 2.2601e-04
Loss = 2.5607e-03, PNorm = 152.3555, GNorm = 0.1273, lr_0 = 2.2586e-04
Loss = 2.2687e-03, PNorm = 152.3608, GNorm = 0.1579, lr_0 = 2.2571e-04
Loss = 3.7644e-03, PNorm = 152.3632, GNorm = 0.1026, lr_0 = 2.2555e-04
Loss = 2.6221e-03, PNorm = 152.3670, GNorm = 0.1991, lr_0 = 2.2540e-04
Loss = 2.8880e-03, PNorm = 152.3725, GNorm = 0.3090, lr_0 = 2.2524e-04
Loss = 1.9547e-03, PNorm = 152.3755, GNorm = 0.1822, lr_0 = 2.2509e-04
Loss = 2.8319e-03, PNorm = 152.3804, GNorm = 0.2050, lr_0 = 2.2493e-04
Loss = 4.8904e-03, PNorm = 152.3846, GNorm = 0.1594, lr_0 = 2.2478e-04
Loss = 2.4168e-03, PNorm = 152.3888, GNorm = 0.1542, lr_0 = 2.2463e-04
Loss = 2.8846e-03, PNorm = 152.3908, GNorm = 0.2822, lr_0 = 2.2447e-04
Loss = 4.6265e-03, PNorm = 152.3949, GNorm = 0.0570, lr_0 = 2.2432e-04
Loss = 2.4182e-03, PNorm = 152.3999, GNorm = 0.0397, lr_0 = 2.2416e-04
Loss = 2.3245e-03, PNorm = 152.4042, GNorm = 0.1883, lr_0 = 2.2401e-04
Loss = 1.9154e-03, PNorm = 152.4077, GNorm = 0.0554, lr_0 = 2.2386e-04
Loss = 3.8039e-03, PNorm = 152.4148, GNorm = 0.1115, lr_0 = 2.2370e-04
Loss = 3.6323e-03, PNorm = 152.4185, GNorm = 0.1036, lr_0 = 2.2355e-04
Loss = 3.3511e-03, PNorm = 152.4256, GNorm = 0.1718, lr_0 = 2.2340e-04
Loss = 2.7801e-03, PNorm = 152.4303, GNorm = 0.2191, lr_0 = 2.2324e-04
Loss = 2.4802e-03, PNorm = 152.4377, GNorm = 0.1684, lr_0 = 2.2309e-04
Loss = 2.0351e-03, PNorm = 152.4431, GNorm = 0.1586, lr_0 = 2.2294e-04
Loss = 3.3240e-03, PNorm = 152.4488, GNorm = 0.1627, lr_0 = 2.2279e-04
Loss = 2.1794e-03, PNorm = 152.4551, GNorm = 0.0592, lr_0 = 2.2263e-04
Loss = 2.0152e-03, PNorm = 152.4597, GNorm = 0.1047, lr_0 = 2.2248e-04
Loss = 3.5294e-03, PNorm = 152.4619, GNorm = 0.1594, lr_0 = 2.2233e-04
Loss = 2.3701e-03, PNorm = 152.4669, GNorm = 0.1895, lr_0 = 2.2218e-04
Loss = 1.8313e-03, PNorm = 152.4708, GNorm = 0.0612, lr_0 = 2.2202e-04
Loss = 4.3160e-03, PNorm = 152.4718, GNorm = 0.1133, lr_0 = 2.2187e-04
Loss = 3.3826e-03, PNorm = 152.4772, GNorm = 0.1749, lr_0 = 2.2172e-04
Loss = 2.6416e-03, PNorm = 152.4793, GNorm = 0.0532, lr_0 = 2.2157e-04
Loss = 2.0874e-03, PNorm = 152.4849, GNorm = 0.1029, lr_0 = 2.2142e-04
Loss = 3.1611e-03, PNorm = 152.4917, GNorm = 0.1823, lr_0 = 2.2126e-04
Loss = 2.0389e-03, PNorm = 152.4964, GNorm = 0.0656, lr_0 = 2.2111e-04
Loss = 1.8300e-03, PNorm = 152.5034, GNorm = 0.1073, lr_0 = 2.2096e-04
Loss = 2.3832e-03, PNorm = 152.5103, GNorm = 0.2116, lr_0 = 2.2081e-04
Loss = 1.9706e-03, PNorm = 152.5146, GNorm = 0.0807, lr_0 = 2.2066e-04
Loss = 2.8046e-03, PNorm = 152.5178, GNorm = 0.1626, lr_0 = 2.2051e-04
Loss = 2.9473e-03, PNorm = 152.5262, GNorm = 0.2710, lr_0 = 2.2036e-04
Loss = 2.3425e-03, PNorm = 152.5298, GNorm = 0.1789, lr_0 = 2.2021e-04
Loss = 2.4377e-03, PNorm = 152.5341, GNorm = 0.2463, lr_0 = 2.2005e-04
Loss = 2.2093e-03, PNorm = 152.5397, GNorm = 0.1478, lr_0 = 2.1990e-04
Loss = 3.5456e-03, PNorm = 152.5456, GNorm = 0.0409, lr_0 = 2.1975e-04
Loss = 1.6485e-03, PNorm = 152.5504, GNorm = 0.0539, lr_0 = 2.1960e-04
Loss = 2.4964e-03, PNorm = 152.5543, GNorm = 0.0787, lr_0 = 2.1945e-04
Loss = 1.8517e-03, PNorm = 152.5602, GNorm = 0.1021, lr_0 = 2.1930e-04
Loss = 4.7773e-03, PNorm = 152.5658, GNorm = 0.1787, lr_0 = 2.1915e-04
Loss = 2.1512e-03, PNorm = 152.5700, GNorm = 0.1852, lr_0 = 2.1900e-04
Loss = 2.3618e-03, PNorm = 152.5752, GNorm = 0.1398, lr_0 = 2.1885e-04
Loss = 1.9793e-03, PNorm = 152.5810, GNorm = 0.2952, lr_0 = 2.1870e-04
Loss = 1.9715e-03, PNorm = 152.5830, GNorm = 0.2126, lr_0 = 2.1855e-04
Loss = 1.8815e-03, PNorm = 152.5889, GNorm = 0.1151, lr_0 = 2.1840e-04
Loss = 2.5875e-03, PNorm = 152.5951, GNorm = 0.1534, lr_0 = 2.1825e-04
Loss = 2.2550e-03, PNorm = 152.6026, GNorm = 0.0566, lr_0 = 2.1810e-04
Loss = 2.5976e-03, PNorm = 152.6080, GNorm = 0.1208, lr_0 = 2.1795e-04
Loss = 2.2822e-03, PNorm = 152.6130, GNorm = 0.0539, lr_0 = 2.1780e-04
Loss = 2.4086e-03, PNorm = 152.6154, GNorm = 0.0999, lr_0 = 2.1765e-04
Loss = 1.9414e-03, PNorm = 152.6191, GNorm = 0.0775, lr_0 = 2.1751e-04
Loss = 3.5580e-03, PNorm = 152.6247, GNorm = 0.1776, lr_0 = 2.1736e-04
Loss = 2.0105e-03, PNorm = 152.6332, GNorm = 0.0664, lr_0 = 2.1721e-04
Loss = 2.0540e-03, PNorm = 152.6382, GNorm = 0.0651, lr_0 = 2.1706e-04
Loss = 1.7919e-03, PNorm = 152.6410, GNorm = 0.0462, lr_0 = 2.1691e-04
Loss = 1.6694e-03, PNorm = 152.6449, GNorm = 0.1275, lr_0 = 2.1676e-04
Loss = 3.1323e-03, PNorm = 152.6510, GNorm = 0.0928, lr_0 = 2.1661e-04
Loss = 1.8079e-03, PNorm = 152.6566, GNorm = 0.0468, lr_0 = 2.1646e-04
Loss = 2.4987e-03, PNorm = 152.6620, GNorm = 0.0900, lr_0 = 2.1632e-04
Loss = 2.1911e-03, PNorm = 152.6676, GNorm = 0.2140, lr_0 = 2.1617e-04
Loss = 2.7769e-03, PNorm = 152.6723, GNorm = 0.4550, lr_0 = 2.1602e-04
Loss = 5.2852e-03, PNorm = 152.6743, GNorm = 0.1886, lr_0 = 2.1587e-04
Loss = 8.0001e-03, PNorm = 152.6768, GNorm = 0.2028, lr_0 = 2.1572e-04
Loss = 2.3868e-03, PNorm = 152.6838, GNorm = 0.2152, lr_0 = 2.1558e-04
Loss = 3.1702e-03, PNorm = 152.6920, GNorm = 0.1896, lr_0 = 2.1543e-04
Loss = 3.1499e-03, PNorm = 152.6990, GNorm = 0.0724, lr_0 = 2.1528e-04
Loss = 2.1322e-03, PNorm = 152.7013, GNorm = 0.1759, lr_0 = 2.1513e-04
Loss = 2.0801e-03, PNorm = 152.7058, GNorm = 0.1690, lr_0 = 2.1499e-04
Loss = 2.6153e-03, PNorm = 152.7108, GNorm = 0.1557, lr_0 = 2.1484e-04
Loss = 2.4300e-03, PNorm = 152.7172, GNorm = 0.1183, lr_0 = 2.1469e-04
Loss = 2.4508e-03, PNorm = 152.7203, GNorm = 0.2039, lr_0 = 2.1454e-04
Loss = 1.8406e-03, PNorm = 152.7237, GNorm = 0.1625, lr_0 = 2.1440e-04
Loss = 2.0380e-03, PNorm = 152.7293, GNorm = 0.1870, lr_0 = 2.1425e-04
Loss = 4.0030e-03, PNorm = 152.7354, GNorm = 0.1016, lr_0 = 2.1410e-04
Loss = 2.2112e-03, PNorm = 152.7402, GNorm = 0.1022, lr_0 = 2.1396e-04
Loss = 4.6851e-03, PNorm = 152.7443, GNorm = 0.2853, lr_0 = 2.1381e-04
Loss = 1.7152e-03, PNorm = 152.7500, GNorm = 0.1654, lr_0 = 2.1366e-04
Loss = 2.8193e-03, PNorm = 152.7532, GNorm = 0.4420, lr_0 = 2.1352e-04
Loss = 2.5828e-03, PNorm = 152.7566, GNorm = 0.0768, lr_0 = 2.1337e-04
Loss = 3.7655e-03, PNorm = 152.7619, GNorm = 0.1427, lr_0 = 2.1323e-04
Loss = 3.3256e-03, PNorm = 152.7651, GNorm = 0.0505, lr_0 = 2.1308e-04
Loss = 2.4471e-03, PNorm = 152.7663, GNorm = 0.1642, lr_0 = 2.1293e-04
Loss = 2.5378e-03, PNorm = 152.7703, GNorm = 0.1542, lr_0 = 2.1279e-04
Loss = 2.5171e-03, PNorm = 152.7762, GNorm = 0.0714, lr_0 = 2.1264e-04
Loss = 2.7608e-03, PNorm = 152.7824, GNorm = 0.1882, lr_0 = 2.1250e-04
Loss = 1.6767e-03, PNorm = 152.7852, GNorm = 0.1308, lr_0 = 2.1235e-04
Loss = 2.1113e-03, PNorm = 152.7889, GNorm = 0.0443, lr_0 = 2.1221e-04
Loss = 1.7785e-03, PNorm = 152.7940, GNorm = 0.2003, lr_0 = 2.1206e-04
Loss = 2.1353e-03, PNorm = 152.7976, GNorm = 0.2551, lr_0 = 2.1191e-04
Loss = 1.8012e-03, PNorm = 152.7995, GNorm = 0.0735, lr_0 = 2.1177e-04
Loss = 2.6171e-03, PNorm = 152.8052, GNorm = 0.1389, lr_0 = 2.1162e-04
Loss = 2.0677e-03, PNorm = 152.8108, GNorm = 0.1741, lr_0 = 2.1148e-04
Loss = 1.8969e-03, PNorm = 152.8164, GNorm = 0.1638, lr_0 = 2.1133e-04
Loss = 2.7222e-03, PNorm = 152.8219, GNorm = 0.5817, lr_0 = 2.1119e-04
Loss = 2.6520e-03, PNorm = 152.8245, GNorm = 0.0734, lr_0 = 2.1104e-04
Loss = 2.0622e-03, PNorm = 152.8307, GNorm = 0.0888, lr_0 = 2.1090e-04
Loss = 3.7638e-03, PNorm = 152.8360, GNorm = 0.2191, lr_0 = 2.1076e-04
Loss = 2.9413e-03, PNorm = 152.8393, GNorm = 0.1238, lr_0 = 2.1061e-04
Loss = 3.1992e-03, PNorm = 152.8442, GNorm = 0.4631, lr_0 = 2.1047e-04
Loss = 2.6941e-03, PNorm = 152.8476, GNorm = 0.2298, lr_0 = 2.1032e-04
Loss = 2.9149e-03, PNorm = 152.8531, GNorm = 0.0814, lr_0 = 2.1018e-04
Loss = 3.4185e-03, PNorm = 152.8589, GNorm = 0.1965, lr_0 = 2.1003e-04
Loss = 2.5731e-03, PNorm = 152.8652, GNorm = 0.2989, lr_0 = 2.0989e-04
Loss = 2.1088e-03, PNorm = 152.8706, GNorm = 0.1116, lr_0 = 2.0975e-04
Loss = 2.0317e-03, PNorm = 152.8747, GNorm = 0.1159, lr_0 = 2.0960e-04
Validation mae = 0.476449
Epoch 21
Loss = 2.3919e-03, PNorm = 152.8779, GNorm = 0.0391, lr_0 = 2.0946e-04
Loss = 1.4755e-03, PNorm = 152.8803, GNorm = 0.3144, lr_0 = 2.0932e-04
Loss = 2.0705e-03, PNorm = 152.8840, GNorm = 0.1119, lr_0 = 2.0917e-04
Loss = 2.7980e-03, PNorm = 152.8864, GNorm = 0.2140, lr_0 = 2.0903e-04
Loss = 2.4200e-03, PNorm = 152.8894, GNorm = 0.1646, lr_0 = 2.0889e-04
Loss = 2.1586e-03, PNorm = 152.8948, GNorm = 0.1092, lr_0 = 2.0874e-04
Loss = 1.6476e-03, PNorm = 152.8998, GNorm = 0.2266, lr_0 = 2.0860e-04
Loss = 2.7690e-03, PNorm = 152.9059, GNorm = 0.0697, lr_0 = 2.0846e-04
Loss = 2.2806e-03, PNorm = 152.9116, GNorm = 0.0590, lr_0 = 2.0831e-04
Loss = 2.1561e-03, PNorm = 152.9167, GNorm = 0.1171, lr_0 = 2.0817e-04
Loss = 1.6655e-03, PNorm = 152.9209, GNorm = 0.0668, lr_0 = 2.0803e-04
Loss = 2.7864e-03, PNorm = 152.9239, GNorm = 0.1594, lr_0 = 2.0789e-04
Loss = 2.4163e-03, PNorm = 152.9266, GNorm = 0.0637, lr_0 = 2.0774e-04
Loss = 1.5628e-03, PNorm = 152.9295, GNorm = 0.1117, lr_0 = 2.0760e-04
Loss = 3.1521e-03, PNorm = 152.9317, GNorm = 0.1258, lr_0 = 2.0746e-04
Loss = 2.2951e-03, PNorm = 152.9336, GNorm = 0.0935, lr_0 = 2.0732e-04
Loss = 3.7932e-03, PNorm = 152.9361, GNorm = 0.0861, lr_0 = 2.0718e-04
Loss = 1.7502e-03, PNorm = 152.9394, GNorm = 0.0626, lr_0 = 2.0703e-04
Loss = 1.5881e-03, PNorm = 152.9422, GNorm = 0.0486, lr_0 = 2.0689e-04
Loss = 2.0747e-03, PNorm = 152.9466, GNorm = 0.0623, lr_0 = 2.0675e-04
Loss = 2.0306e-03, PNorm = 152.9490, GNorm = 0.2920, lr_0 = 2.0661e-04
Loss = 1.4621e-03, PNorm = 152.9524, GNorm = 0.1142, lr_0 = 2.0647e-04
Loss = 2.8330e-03, PNorm = 152.9553, GNorm = 0.1288, lr_0 = 2.0633e-04
Loss = 1.4755e-03, PNorm = 152.9596, GNorm = 0.1797, lr_0 = 2.0618e-04
Loss = 2.6979e-03, PNorm = 152.9638, GNorm = 0.0424, lr_0 = 2.0604e-04
Loss = 2.1097e-03, PNorm = 152.9660, GNorm = 0.0696, lr_0 = 2.0590e-04
Loss = 1.7437e-03, PNorm = 152.9689, GNorm = 0.0743, lr_0 = 2.0576e-04
Loss = 1.5735e-03, PNorm = 152.9698, GNorm = 0.0482, lr_0 = 2.0562e-04
Loss = 2.5168e-03, PNorm = 152.9724, GNorm = 0.1957, lr_0 = 2.0548e-04
Loss = 1.5978e-03, PNorm = 152.9750, GNorm = 0.1526, lr_0 = 2.0534e-04
Loss = 2.0140e-03, PNorm = 152.9796, GNorm = 0.0424, lr_0 = 2.0520e-04
Loss = 1.3498e-03, PNorm = 152.9840, GNorm = 0.2057, lr_0 = 2.0506e-04
Loss = 1.6638e-03, PNorm = 152.9891, GNorm = 0.0815, lr_0 = 2.0492e-04
Loss = 1.7019e-03, PNorm = 152.9927, GNorm = 0.0575, lr_0 = 2.0478e-04
Loss = 2.7162e-03, PNorm = 152.9962, GNorm = 0.3068, lr_0 = 2.0464e-04
Loss = 1.2605e-03, PNorm = 152.9987, GNorm = 0.0562, lr_0 = 2.0450e-04
Loss = 3.1738e-03, PNorm = 153.0030, GNorm = 0.2331, lr_0 = 2.0436e-04
Loss = 3.1130e-03, PNorm = 153.0065, GNorm = 0.0322, lr_0 = 2.0422e-04
Loss = 2.5755e-03, PNorm = 153.0098, GNorm = 0.1805, lr_0 = 2.0408e-04
Loss = 2.1932e-03, PNorm = 153.0143, GNorm = 0.1407, lr_0 = 2.0394e-04
Loss = 1.7963e-03, PNorm = 153.0180, GNorm = 0.0567, lr_0 = 2.0380e-04
Loss = 4.2861e-03, PNorm = 153.0212, GNorm = 0.0468, lr_0 = 2.0366e-04
Loss = 2.4735e-03, PNorm = 153.0262, GNorm = 0.1842, lr_0 = 2.0352e-04
Loss = 4.1848e-03, PNorm = 153.0299, GNorm = 0.1018, lr_0 = 2.0338e-04
Loss = 1.6306e-03, PNorm = 153.0348, GNorm = 0.0639, lr_0 = 2.0324e-04
Loss = 2.1962e-03, PNorm = 153.0398, GNorm = 0.0557, lr_0 = 2.0310e-04
Loss = 2.2729e-03, PNorm = 153.0430, GNorm = 0.0788, lr_0 = 2.0296e-04
Loss = 1.5136e-03, PNorm = 153.0466, GNorm = 0.0603, lr_0 = 2.0282e-04
Loss = 1.7791e-03, PNorm = 153.0486, GNorm = 0.1246, lr_0 = 2.0268e-04
Loss = 1.8815e-03, PNorm = 153.0532, GNorm = 0.0800, lr_0 = 2.0254e-04
Loss = 2.9270e-03, PNorm = 153.0592, GNorm = 0.0554, lr_0 = 2.0240e-04
Loss = 2.3811e-03, PNorm = 153.0626, GNorm = 0.1498, lr_0 = 2.0227e-04
Loss = 3.3789e-03, PNorm = 153.0661, GNorm = 0.0729, lr_0 = 2.0213e-04
Loss = 1.4027e-03, PNorm = 153.0689, GNorm = 0.1949, lr_0 = 2.0199e-04
Loss = 1.6632e-03, PNorm = 153.0722, GNorm = 0.1039, lr_0 = 2.0185e-04
Loss = 1.8103e-03, PNorm = 153.0752, GNorm = 0.1642, lr_0 = 2.0171e-04
Loss = 1.6253e-03, PNorm = 153.0788, GNorm = 0.0948, lr_0 = 2.0157e-04
Loss = 1.4333e-03, PNorm = 153.0827, GNorm = 0.2681, lr_0 = 2.0144e-04
Loss = 1.5759e-03, PNorm = 153.0868, GNorm = 0.1388, lr_0 = 2.0130e-04
Loss = 2.0329e-03, PNorm = 153.0896, GNorm = 0.1475, lr_0 = 2.0116e-04
Loss = 2.6433e-03, PNorm = 153.0934, GNorm = 0.4269, lr_0 = 2.0102e-04
Loss = 2.5741e-03, PNorm = 153.0975, GNorm = 0.1353, lr_0 = 2.0088e-04
Loss = 1.4692e-03, PNorm = 153.1019, GNorm = 0.0891, lr_0 = 2.0075e-04
Loss = 1.7946e-03, PNorm = 153.1052, GNorm = 0.0858, lr_0 = 2.0061e-04
Loss = 4.9610e-03, PNorm = 153.1089, GNorm = 0.1436, lr_0 = 2.0047e-04
Loss = 1.4733e-03, PNorm = 153.1126, GNorm = 0.1325, lr_0 = 2.0033e-04
Loss = 2.0498e-03, PNorm = 153.1178, GNorm = 0.1144, lr_0 = 2.0020e-04
Loss = 2.7007e-03, PNorm = 153.1216, GNorm = 0.0838, lr_0 = 2.0006e-04
Loss = 2.0482e-03, PNorm = 153.1251, GNorm = 0.0688, lr_0 = 1.9992e-04
Loss = 1.8220e-03, PNorm = 153.1296, GNorm = 0.0513, lr_0 = 1.9979e-04
Loss = 2.4897e-03, PNorm = 153.1334, GNorm = 0.0848, lr_0 = 1.9965e-04
Loss = 2.2001e-03, PNorm = 153.1371, GNorm = 0.0487, lr_0 = 1.9951e-04
Loss = 1.5589e-03, PNorm = 153.1416, GNorm = 0.1322, lr_0 = 1.9938e-04
Loss = 2.7665e-03, PNorm = 153.1463, GNorm = 0.0783, lr_0 = 1.9924e-04
Loss = 2.4381e-03, PNorm = 153.1496, GNorm = 0.1349, lr_0 = 1.9910e-04
Loss = 2.0802e-03, PNorm = 153.1543, GNorm = 0.1269, lr_0 = 1.9897e-04
Loss = 4.0041e-03, PNorm = 153.1600, GNorm = 0.1411, lr_0 = 1.9883e-04
Loss = 2.3318e-03, PNorm = 153.1641, GNorm = 0.0775, lr_0 = 1.9869e-04
Loss = 2.0613e-03, PNorm = 153.1684, GNorm = 0.0720, lr_0 = 1.9856e-04
Loss = 3.2569e-03, PNorm = 153.1729, GNorm = 0.1238, lr_0 = 1.9842e-04
Loss = 1.8525e-03, PNorm = 153.1762, GNorm = 0.0705, lr_0 = 1.9829e-04
Loss = 1.8467e-03, PNorm = 153.1813, GNorm = 0.0951, lr_0 = 1.9815e-04
Loss = 3.5351e-03, PNorm = 153.1876, GNorm = 0.1078, lr_0 = 1.9801e-04
Loss = 2.4370e-03, PNorm = 153.1932, GNorm = 0.0407, lr_0 = 1.9788e-04
Loss = 2.6328e-03, PNorm = 153.1977, GNorm = 0.0997, lr_0 = 1.9774e-04
Loss = 4.7315e-03, PNorm = 153.2013, GNorm = 0.1111, lr_0 = 1.9761e-04
Loss = 2.9598e-03, PNorm = 153.2036, GNorm = 0.0618, lr_0 = 1.9747e-04
Loss = 1.8022e-03, PNorm = 153.2055, GNorm = 0.1129, lr_0 = 1.9734e-04
Loss = 3.3063e-03, PNorm = 153.2111, GNorm = 0.0608, lr_0 = 1.9720e-04
Loss = 2.0973e-03, PNorm = 153.2173, GNorm = 0.1065, lr_0 = 1.9707e-04
Loss = 2.7575e-03, PNorm = 153.2227, GNorm = 0.0666, lr_0 = 1.9693e-04
Loss = 2.9735e-03, PNorm = 153.2269, GNorm = 0.0696, lr_0 = 1.9680e-04
Loss = 2.3138e-03, PNorm = 153.2303, GNorm = 0.0738, lr_0 = 1.9666e-04
Loss = 1.6455e-03, PNorm = 153.2347, GNorm = 0.0785, lr_0 = 1.9653e-04
Loss = 3.1617e-03, PNorm = 153.2395, GNorm = 0.1410, lr_0 = 1.9639e-04
Loss = 3.4135e-03, PNorm = 153.2427, GNorm = 0.0544, lr_0 = 1.9626e-04
Loss = 2.9416e-03, PNorm = 153.2440, GNorm = 0.1245, lr_0 = 1.9612e-04
Loss = 1.5716e-03, PNorm = 153.2482, GNorm = 0.1464, lr_0 = 1.9599e-04
Loss = 1.8593e-03, PNorm = 153.2524, GNorm = 0.1016, lr_0 = 1.9585e-04
Loss = 1.6617e-03, PNorm = 153.2580, GNorm = 0.1686, lr_0 = 1.9572e-04
Loss = 1.6183e-03, PNorm = 153.2630, GNorm = 0.1752, lr_0 = 1.9559e-04
Loss = 2.0112e-03, PNorm = 153.2681, GNorm = 0.0417, lr_0 = 1.9545e-04
Loss = 1.5404e-03, PNorm = 153.2698, GNorm = 0.0575, lr_0 = 1.9532e-04
Loss = 2.5158e-03, PNorm = 153.2713, GNorm = 0.1237, lr_0 = 1.9518e-04
Loss = 2.1719e-03, PNorm = 153.2746, GNorm = 0.1477, lr_0 = 1.9505e-04
Loss = 3.1147e-03, PNorm = 153.2793, GNorm = 0.1840, lr_0 = 1.9492e-04
Loss = 4.0483e-03, PNorm = 153.2843, GNorm = 0.2125, lr_0 = 1.9478e-04
Loss = 3.0327e-03, PNorm = 153.2866, GNorm = 0.1650, lr_0 = 1.9465e-04
Loss = 2.0023e-03, PNorm = 153.2928, GNorm = 0.1953, lr_0 = 1.9452e-04
Loss = 1.7124e-03, PNorm = 153.2979, GNorm = 0.1199, lr_0 = 1.9438e-04
Loss = 2.0499e-03, PNorm = 153.3028, GNorm = 0.1073, lr_0 = 1.9425e-04
Loss = 2.8408e-03, PNorm = 153.3081, GNorm = 0.0793, lr_0 = 1.9412e-04
Loss = 2.6779e-03, PNorm = 153.3141, GNorm = 0.0939, lr_0 = 1.9398e-04
Loss = 2.5759e-03, PNorm = 153.3167, GNorm = 0.2821, lr_0 = 1.9385e-04
Loss = 1.7469e-03, PNorm = 153.3195, GNorm = 0.1646, lr_0 = 1.9372e-04
Loss = 2.4596e-03, PNorm = 153.3223, GNorm = 0.2681, lr_0 = 1.9359e-04
Loss = 1.9983e-03, PNorm = 153.3265, GNorm = 0.1922, lr_0 = 1.9345e-04
Loss = 2.6482e-03, PNorm = 153.3311, GNorm = 0.2618, lr_0 = 1.9332e-04
Loss = 1.4490e-03, PNorm = 153.3348, GNorm = 0.2278, lr_0 = 1.9319e-04
Loss = 1.9507e-03, PNorm = 153.3393, GNorm = 0.0628, lr_0 = 1.9306e-04
Validation mae = 0.475803
Epoch 22
Loss = 1.6617e-03, PNorm = 153.3433, GNorm = 0.2641, lr_0 = 1.9292e-04
Loss = 2.0944e-03, PNorm = 153.3452, GNorm = 0.0452, lr_0 = 1.9279e-04
Loss = 1.4599e-03, PNorm = 153.3480, GNorm = 0.1498, lr_0 = 1.9266e-04
Loss = 2.2446e-03, PNorm = 153.3487, GNorm = 0.0529, lr_0 = 1.9253e-04
Loss = 4.4094e-03, PNorm = 153.3528, GNorm = 0.1960, lr_0 = 1.9240e-04
Loss = 1.6164e-03, PNorm = 153.3583, GNorm = 0.0769, lr_0 = 1.9226e-04
Loss = 1.3165e-03, PNorm = 153.3641, GNorm = 0.1107, lr_0 = 1.9213e-04
Loss = 1.6476e-03, PNorm = 153.3670, GNorm = 0.1373, lr_0 = 1.9200e-04
Loss = 3.0104e-03, PNorm = 153.3692, GNorm = 0.0734, lr_0 = 1.9187e-04
Loss = 3.5262e-03, PNorm = 153.3705, GNorm = 0.1008, lr_0 = 1.9174e-04
Loss = 2.1685e-03, PNorm = 153.3753, GNorm = 0.0696, lr_0 = 1.9161e-04
Loss = 3.1470e-03, PNorm = 153.3785, GNorm = 0.0659, lr_0 = 1.9148e-04
Loss = 2.3650e-03, PNorm = 153.3808, GNorm = 0.0454, lr_0 = 1.9134e-04
Loss = 2.1183e-03, PNorm = 153.3836, GNorm = 0.1626, lr_0 = 1.9121e-04
Loss = 1.8896e-03, PNorm = 153.3856, GNorm = 0.1393, lr_0 = 1.9108e-04
Loss = 1.9012e-03, PNorm = 153.3877, GNorm = 0.1471, lr_0 = 1.9095e-04
Loss = 1.4910e-03, PNorm = 153.3907, GNorm = 0.0604, lr_0 = 1.9082e-04
Loss = 2.7876e-03, PNorm = 153.3951, GNorm = 0.0405, lr_0 = 1.9069e-04
Loss = 2.7259e-03, PNorm = 153.3978, GNorm = 0.1932, lr_0 = 1.9056e-04
Loss = 1.5701e-03, PNorm = 153.3996, GNorm = 0.0608, lr_0 = 1.9043e-04
Loss = 1.7094e-03, PNorm = 153.4030, GNorm = 0.0342, lr_0 = 1.9030e-04
Loss = 1.8953e-03, PNorm = 153.4066, GNorm = 0.0678, lr_0 = 1.9017e-04
Loss = 1.2950e-03, PNorm = 153.4082, GNorm = 0.0592, lr_0 = 1.9004e-04
Loss = 1.2327e-03, PNorm = 153.4108, GNorm = 0.0984, lr_0 = 1.8991e-04
Loss = 1.5619e-03, PNorm = 153.4124, GNorm = 0.0995, lr_0 = 1.8978e-04
Loss = 1.6381e-03, PNorm = 153.4171, GNorm = 0.0720, lr_0 = 1.8965e-04
Loss = 1.1246e-03, PNorm = 153.4204, GNorm = 0.0396, lr_0 = 1.8952e-04
Loss = 1.6434e-03, PNorm = 153.4228, GNorm = 0.1252, lr_0 = 1.8939e-04
Loss = 2.7199e-03, PNorm = 153.4254, GNorm = 0.1754, lr_0 = 1.8926e-04
Loss = 1.4851e-03, PNorm = 153.4283, GNorm = 0.0549, lr_0 = 1.8913e-04
Loss = 1.3322e-03, PNorm = 153.4321, GNorm = 0.1053, lr_0 = 1.8900e-04
Loss = 3.8135e-03, PNorm = 153.4343, GNorm = 0.1680, lr_0 = 1.8887e-04
Loss = 1.7545e-03, PNorm = 153.4370, GNorm = 0.2605, lr_0 = 1.8874e-04
Loss = 1.7604e-03, PNorm = 153.4381, GNorm = 0.2011, lr_0 = 1.8861e-04
Loss = 1.2304e-03, PNorm = 153.4388, GNorm = 0.1630, lr_0 = 1.8848e-04
Loss = 1.5269e-03, PNorm = 153.4438, GNorm = 0.0919, lr_0 = 1.8835e-04
Loss = 1.8140e-03, PNorm = 153.4481, GNorm = 0.3657, lr_0 = 1.8822e-04
Loss = 1.4707e-03, PNorm = 153.4519, GNorm = 0.0713, lr_0 = 1.8809e-04
Loss = 1.5509e-03, PNorm = 153.4552, GNorm = 0.1179, lr_0 = 1.8797e-04
Loss = 2.5680e-03, PNorm = 153.4570, GNorm = 0.0636, lr_0 = 1.8784e-04
Loss = 1.4286e-03, PNorm = 153.4610, GNorm = 0.2206, lr_0 = 1.8771e-04
Loss = 1.2945e-03, PNorm = 153.4647, GNorm = 0.1781, lr_0 = 1.8758e-04
Loss = 1.2689e-03, PNorm = 153.4676, GNorm = 0.1078, lr_0 = 1.8745e-04
Loss = 1.7876e-03, PNorm = 153.4715, GNorm = 0.0949, lr_0 = 1.8732e-04
Loss = 1.3483e-03, PNorm = 153.4743, GNorm = 0.1339, lr_0 = 1.8719e-04
Loss = 1.6621e-03, PNorm = 153.4754, GNorm = 0.0475, lr_0 = 1.8707e-04
Loss = 1.3360e-03, PNorm = 153.4798, GNorm = 0.1392, lr_0 = 1.8694e-04
Loss = 2.2640e-03, PNorm = 153.4834, GNorm = 0.0738, lr_0 = 1.8681e-04
Loss = 1.4299e-03, PNorm = 153.4878, GNorm = 0.1948, lr_0 = 1.8668e-04
Loss = 1.8302e-03, PNorm = 153.4925, GNorm = 0.1758, lr_0 = 1.8655e-04
Loss = 1.4458e-03, PNorm = 153.4955, GNorm = 0.2695, lr_0 = 1.8643e-04
Loss = 4.3341e-03, PNorm = 153.5002, GNorm = 0.0577, lr_0 = 1.8630e-04
Loss = 1.2636e-03, PNorm = 153.5037, GNorm = 0.0581, lr_0 = 1.8617e-04
Loss = 2.3312e-03, PNorm = 153.5044, GNorm = 0.1011, lr_0 = 1.8604e-04
Loss = 2.1236e-03, PNorm = 153.5057, GNorm = 0.1159, lr_0 = 1.8592e-04
Loss = 1.7846e-03, PNorm = 153.5103, GNorm = 0.0491, lr_0 = 1.8579e-04
Loss = 2.5529e-03, PNorm = 153.5141, GNorm = 0.2514, lr_0 = 1.8566e-04
Loss = 2.3296e-03, PNorm = 153.5180, GNorm = 0.1663, lr_0 = 1.8553e-04
Loss = 1.3409e-03, PNorm = 153.5213, GNorm = 0.0754, lr_0 = 1.8541e-04
Loss = 1.3155e-03, PNorm = 153.5248, GNorm = 0.2642, lr_0 = 1.8528e-04
Loss = 1.9854e-03, PNorm = 153.5322, GNorm = 0.1643, lr_0 = 1.8515e-04
Loss = 1.5633e-03, PNorm = 153.5365, GNorm = 0.1098, lr_0 = 1.8503e-04
Loss = 1.8027e-03, PNorm = 153.5382, GNorm = 0.1212, lr_0 = 1.8490e-04
Loss = 1.7478e-03, PNorm = 153.5413, GNorm = 0.1542, lr_0 = 1.8477e-04
Loss = 1.7224e-03, PNorm = 153.5424, GNorm = 0.0484, lr_0 = 1.8465e-04
Loss = 3.7285e-03, PNorm = 153.5429, GNorm = 0.3155, lr_0 = 1.8452e-04
Loss = 2.5625e-03, PNorm = 153.5450, GNorm = 0.0346, lr_0 = 1.8439e-04
Loss = 2.1063e-03, PNorm = 153.5493, GNorm = 0.1302, lr_0 = 1.8427e-04
Loss = 1.8589e-03, PNorm = 153.5534, GNorm = 0.1071, lr_0 = 1.8414e-04
Loss = 2.0610e-03, PNorm = 153.5584, GNorm = 0.0314, lr_0 = 1.8401e-04
Loss = 1.4025e-03, PNorm = 153.5626, GNorm = 0.0567, lr_0 = 1.8389e-04
Loss = 2.7706e-03, PNorm = 153.5663, GNorm = 0.0801, lr_0 = 1.8376e-04
Loss = 4.6197e-03, PNorm = 153.5675, GNorm = 0.0367, lr_0 = 1.8364e-04
Loss = 1.8224e-03, PNorm = 153.5696, GNorm = 0.2228, lr_0 = 1.8351e-04
Loss = 1.4973e-03, PNorm = 153.5721, GNorm = 0.1463, lr_0 = 1.8338e-04
Loss = 2.2490e-03, PNorm = 153.5746, GNorm = 0.0399, lr_0 = 1.8326e-04
Loss = 1.2844e-03, PNorm = 153.5781, GNorm = 0.1486, lr_0 = 1.8313e-04
Loss = 2.1460e-03, PNorm = 153.5842, GNorm = 0.0635, lr_0 = 1.8301e-04
Loss = 2.6660e-03, PNorm = 153.5895, GNorm = 0.1971, lr_0 = 1.8288e-04
Loss = 1.7701e-03, PNorm = 153.5917, GNorm = 0.1176, lr_0 = 1.8276e-04
Loss = 1.5642e-03, PNorm = 153.5965, GNorm = 0.1147, lr_0 = 1.8263e-04
Loss = 1.6394e-03, PNorm = 153.6014, GNorm = 0.0939, lr_0 = 1.8251e-04
Loss = 1.4516e-03, PNorm = 153.6055, GNorm = 0.1576, lr_0 = 1.8238e-04
Loss = 1.4691e-03, PNorm = 153.6099, GNorm = 0.1450, lr_0 = 1.8226e-04
Loss = 1.8255e-03, PNorm = 153.6104, GNorm = 0.2986, lr_0 = 1.8213e-04
Loss = 1.3402e-03, PNorm = 153.6141, GNorm = 0.0587, lr_0 = 1.8201e-04
Loss = 1.5071e-03, PNorm = 153.6169, GNorm = 0.1163, lr_0 = 1.8188e-04
Loss = 2.0392e-03, PNorm = 153.6217, GNorm = 0.0525, lr_0 = 1.8176e-04
Loss = 2.1841e-03, PNorm = 153.6228, GNorm = 0.2163, lr_0 = 1.8163e-04
Loss = 3.4177e-03, PNorm = 153.6237, GNorm = 0.0382, lr_0 = 1.8151e-04
Loss = 1.3288e-03, PNorm = 153.6246, GNorm = 0.2130, lr_0 = 1.8138e-04
Loss = 4.3959e-03, PNorm = 153.6265, GNorm = 0.0878, lr_0 = 1.8126e-04
Loss = 2.0228e-03, PNorm = 153.6293, GNorm = 0.2383, lr_0 = 1.8114e-04
Loss = 2.8550e-03, PNorm = 153.6340, GNorm = 0.1927, lr_0 = 1.8101e-04
Loss = 1.8815e-03, PNorm = 153.6371, GNorm = 0.2293, lr_0 = 1.8089e-04
Loss = 1.6068e-03, PNorm = 153.6406, GNorm = 0.1751, lr_0 = 1.8076e-04
Loss = 2.1227e-03, PNorm = 153.6454, GNorm = 0.0961, lr_0 = 1.8064e-04
Loss = 1.6874e-03, PNorm = 153.6511, GNorm = 0.0751, lr_0 = 1.8052e-04
Loss = 3.4195e-03, PNorm = 153.6545, GNorm = 0.1293, lr_0 = 1.8039e-04
Loss = 2.0313e-03, PNorm = 153.6588, GNorm = 0.0619, lr_0 = 1.8027e-04
Loss = 3.0325e-03, PNorm = 153.6629, GNorm = 0.1058, lr_0 = 1.8015e-04
Loss = 1.4227e-03, PNorm = 153.6658, GNorm = 0.2112, lr_0 = 1.8002e-04
Loss = 2.1591e-03, PNorm = 153.6715, GNorm = 0.0339, lr_0 = 1.7990e-04
Loss = 5.0398e-03, PNorm = 153.6778, GNorm = 0.0413, lr_0 = 1.7978e-04
Loss = 2.7628e-03, PNorm = 153.6845, GNorm = 0.0640, lr_0 = 1.7965e-04
Loss = 2.6129e-03, PNorm = 153.6888, GNorm = 0.0952, lr_0 = 1.7953e-04
Loss = 3.5236e-03, PNorm = 153.6903, GNorm = 0.1767, lr_0 = 1.7941e-04
Loss = 1.6618e-03, PNorm = 153.6929, GNorm = 0.0979, lr_0 = 1.7928e-04
Loss = 2.0468e-03, PNorm = 153.6959, GNorm = 0.1087, lr_0 = 1.7916e-04
Loss = 1.8464e-03, PNorm = 153.7020, GNorm = 0.2091, lr_0 = 1.7904e-04
Loss = 1.8122e-03, PNorm = 153.7086, GNorm = 0.0820, lr_0 = 1.7892e-04
Loss = 2.7445e-03, PNorm = 153.7122, GNorm = 0.1535, lr_0 = 1.7879e-04
Loss = 2.6956e-03, PNorm = 153.7159, GNorm = 0.0824, lr_0 = 1.7867e-04
Loss = 1.3512e-03, PNorm = 153.7199, GNorm = 0.0373, lr_0 = 1.7855e-04
Loss = 1.6399e-03, PNorm = 153.7239, GNorm = 0.0918, lr_0 = 1.7843e-04
Loss = 1.7103e-03, PNorm = 153.7262, GNorm = 0.1067, lr_0 = 1.7830e-04
Loss = 1.5805e-03, PNorm = 153.7302, GNorm = 0.0429, lr_0 = 1.7818e-04
Loss = 1.7598e-03, PNorm = 153.7347, GNorm = 0.0330, lr_0 = 1.7806e-04
Loss = 1.7292e-03, PNorm = 153.7392, GNorm = 0.0935, lr_0 = 1.7794e-04
Loss = 2.8084e-03, PNorm = 153.7452, GNorm = 0.2055, lr_0 = 1.7782e-04
Validation mae = 0.476517
Epoch 23
Loss = 1.5401e-03, PNorm = 153.7479, GNorm = 0.0982, lr_0 = 1.7769e-04
Loss = 1.4236e-03, PNorm = 153.7487, GNorm = 0.0543, lr_0 = 1.7757e-04
Loss = 1.7042e-03, PNorm = 153.7499, GNorm = 0.0416, lr_0 = 1.7745e-04
Loss = 1.3672e-03, PNorm = 153.7518, GNorm = 0.0588, lr_0 = 1.7733e-04
Loss = 1.5827e-03, PNorm = 153.7532, GNorm = 0.1711, lr_0 = 1.7721e-04
Loss = 1.4364e-03, PNorm = 153.7556, GNorm = 0.0630, lr_0 = 1.7709e-04
Loss = 1.2918e-03, PNorm = 153.7581, GNorm = 0.0464, lr_0 = 1.7696e-04
Loss = 1.5319e-03, PNorm = 153.7613, GNorm = 0.0913, lr_0 = 1.7684e-04
Loss = 1.1661e-03, PNorm = 153.7651, GNorm = 0.1529, lr_0 = 1.7672e-04
Loss = 1.3495e-03, PNorm = 153.7674, GNorm = 0.0737, lr_0 = 1.7660e-04
Loss = 2.8401e-03, PNorm = 153.7701, GNorm = 0.1153, lr_0 = 1.7648e-04
Loss = 2.3080e-03, PNorm = 153.7727, GNorm = 0.1662, lr_0 = 1.7636e-04
Loss = 2.6821e-03, PNorm = 153.7771, GNorm = 0.0461, lr_0 = 1.7624e-04
Loss = 1.5501e-03, PNorm = 153.7799, GNorm = 0.0736, lr_0 = 1.7612e-04
Loss = 1.2582e-03, PNorm = 153.7814, GNorm = 0.0324, lr_0 = 1.7600e-04
Loss = 1.6602e-03, PNorm = 153.7839, GNorm = 0.2099, lr_0 = 1.7588e-04
Loss = 3.5465e-03, PNorm = 153.7877, GNorm = 0.0983, lr_0 = 1.7576e-04
Loss = 1.2947e-03, PNorm = 153.7899, GNorm = 0.0442, lr_0 = 1.7564e-04
Loss = 1.1740e-03, PNorm = 153.7930, GNorm = 0.0750, lr_0 = 1.7552e-04
Loss = 1.2187e-03, PNorm = 153.7936, GNorm = 0.1405, lr_0 = 1.7540e-04
Loss = 1.7993e-03, PNorm = 153.7969, GNorm = 0.3023, lr_0 = 1.7528e-04
Loss = 1.4098e-03, PNorm = 153.7996, GNorm = 0.1389, lr_0 = 1.7516e-04
Loss = 1.5388e-03, PNorm = 153.8013, GNorm = 0.0963, lr_0 = 1.7504e-04
Loss = 1.0475e-03, PNorm = 153.8038, GNorm = 0.1082, lr_0 = 1.7492e-04
Loss = 1.4807e-03, PNorm = 153.8063, GNorm = 0.2159, lr_0 = 1.7480e-04
Loss = 1.4110e-03, PNorm = 153.8093, GNorm = 0.0836, lr_0 = 1.7468e-04
Loss = 1.7081e-03, PNorm = 153.8139, GNorm = 0.1353, lr_0 = 1.7456e-04
Loss = 1.5401e-03, PNorm = 153.8175, GNorm = 0.0889, lr_0 = 1.7444e-04
Loss = 3.3977e-03, PNorm = 153.8201, GNorm = 0.2507, lr_0 = 1.7432e-04
Loss = 2.0349e-03, PNorm = 153.8209, GNorm = 0.1798, lr_0 = 1.7420e-04
Loss = 2.1245e-03, PNorm = 153.8224, GNorm = 0.0635, lr_0 = 1.7408e-04
Loss = 1.2758e-03, PNorm = 153.8255, GNorm = 0.0266, lr_0 = 1.7396e-04
Loss = 3.2422e-03, PNorm = 153.8304, GNorm = 0.0675, lr_0 = 1.7384e-04
Loss = 1.2501e-03, PNorm = 153.8323, GNorm = 0.1159, lr_0 = 1.7372e-04
Loss = 1.8785e-03, PNorm = 153.8317, GNorm = 0.0668, lr_0 = 1.7360e-04
Loss = 1.0274e-03, PNorm = 153.8335, GNorm = 0.0810, lr_0 = 1.7348e-04
Loss = 1.2353e-03, PNorm = 153.8364, GNorm = 0.0398, lr_0 = 1.7336e-04
Loss = 2.7088e-03, PNorm = 153.8394, GNorm = 0.1946, lr_0 = 1.7325e-04
Loss = 1.5551e-03, PNorm = 153.8421, GNorm = 0.1464, lr_0 = 1.7313e-04
Loss = 3.0940e-03, PNorm = 153.8453, GNorm = 0.0946, lr_0 = 1.7301e-04
Loss = 2.7623e-03, PNorm = 153.8484, GNorm = 0.1418, lr_0 = 1.7289e-04
Loss = 1.3033e-03, PNorm = 153.8507, GNorm = 0.1582, lr_0 = 1.7277e-04
Loss = 1.0961e-03, PNorm = 153.8542, GNorm = 0.0495, lr_0 = 1.7265e-04
Loss = 1.1900e-03, PNorm = 153.8579, GNorm = 0.1495, lr_0 = 1.7253e-04
Loss = 5.0711e-03, PNorm = 153.8594, GNorm = 0.1708, lr_0 = 1.7242e-04
Loss = 1.6574e-03, PNorm = 153.8638, GNorm = 0.0644, lr_0 = 1.7230e-04
Loss = 2.5919e-03, PNorm = 153.8681, GNorm = 0.1224, lr_0 = 1.7218e-04
Loss = 1.4999e-03, PNorm = 153.8728, GNorm = 0.1395, lr_0 = 1.7206e-04
Loss = 1.2552e-03, PNorm = 153.8750, GNorm = 0.1205, lr_0 = 1.7194e-04
Loss = 1.0935e-03, PNorm = 153.8781, GNorm = 0.0954, lr_0 = 1.7183e-04
Loss = 1.0320e-03, PNorm = 153.8814, GNorm = 0.0347, lr_0 = 1.7171e-04
Loss = 3.0626e-03, PNorm = 153.8846, GNorm = 0.1361, lr_0 = 1.7159e-04
Loss = 1.6083e-03, PNorm = 153.8875, GNorm = 0.2126, lr_0 = 1.7147e-04
Loss = 1.1250e-03, PNorm = 153.8883, GNorm = 0.0722, lr_0 = 1.7136e-04
Loss = 3.9399e-03, PNorm = 153.8878, GNorm = 0.1482, lr_0 = 1.7124e-04
Loss = 1.6243e-03, PNorm = 153.8886, GNorm = 0.4206, lr_0 = 1.7112e-04
Loss = 1.3391e-03, PNorm = 153.8901, GNorm = 0.1516, lr_0 = 1.7100e-04
Loss = 1.3689e-03, PNorm = 153.8942, GNorm = 0.1347, lr_0 = 1.7089e-04
Loss = 1.6277e-03, PNorm = 153.8973, GNorm = 0.0678, lr_0 = 1.7077e-04
Loss = 2.8106e-03, PNorm = 153.9019, GNorm = 0.1670, lr_0 = 1.7065e-04
Loss = 1.1893e-03, PNorm = 153.9051, GNorm = 0.1752, lr_0 = 1.7054e-04
Loss = 1.5282e-03, PNorm = 153.9105, GNorm = 0.0764, lr_0 = 1.7042e-04
Loss = 2.1672e-03, PNorm = 153.9130, GNorm = 0.1094, lr_0 = 1.7030e-04
Loss = 1.3230e-03, PNorm = 153.9184, GNorm = 0.2011, lr_0 = 1.7019e-04
Loss = 1.9874e-03, PNorm = 153.9206, GNorm = 0.1568, lr_0 = 1.7007e-04
Loss = 3.4281e-03, PNorm = 153.9241, GNorm = 0.2398, lr_0 = 1.6995e-04
Loss = 1.8959e-03, PNorm = 153.9254, GNorm = 0.1135, lr_0 = 1.6984e-04
Loss = 1.3190e-03, PNorm = 153.9276, GNorm = 0.0933, lr_0 = 1.6972e-04
Loss = 1.4489e-03, PNorm = 153.9301, GNorm = 0.0576, lr_0 = 1.6960e-04
Loss = 1.4635e-03, PNorm = 153.9333, GNorm = 0.1007, lr_0 = 1.6949e-04
Loss = 1.4877e-03, PNorm = 153.9374, GNorm = 0.1174, lr_0 = 1.6937e-04
Loss = 2.2719e-03, PNorm = 153.9384, GNorm = 0.0560, lr_0 = 1.6926e-04
Loss = 1.7285e-03, PNorm = 153.9398, GNorm = 0.1122, lr_0 = 1.6914e-04
Loss = 1.5149e-03, PNorm = 153.9423, GNorm = 0.1411, lr_0 = 1.6902e-04
Loss = 1.4607e-03, PNorm = 153.9473, GNorm = 0.0970, lr_0 = 1.6891e-04
Loss = 3.0387e-03, PNorm = 153.9525, GNorm = 0.3165, lr_0 = 1.6879e-04
Loss = 1.8203e-03, PNorm = 153.9562, GNorm = 0.0698, lr_0 = 1.6868e-04
Loss = 1.8015e-03, PNorm = 153.9589, GNorm = 0.1105, lr_0 = 1.6856e-04
Loss = 1.5785e-03, PNorm = 153.9614, GNorm = 0.1840, lr_0 = 1.6845e-04
Loss = 2.9036e-03, PNorm = 153.9641, GNorm = 0.0782, lr_0 = 1.6833e-04
Loss = 1.6930e-03, PNorm = 153.9638, GNorm = 0.0345, lr_0 = 1.6821e-04
Loss = 1.9051e-03, PNorm = 153.9644, GNorm = 0.2259, lr_0 = 1.6810e-04
Loss = 1.0063e-03, PNorm = 153.9662, GNorm = 0.0815, lr_0 = 1.6798e-04
Loss = 1.3035e-03, PNorm = 153.9681, GNorm = 0.0530, lr_0 = 1.6787e-04
Loss = 1.8822e-03, PNorm = 153.9705, GNorm = 0.0664, lr_0 = 1.6775e-04
Loss = 1.3134e-03, PNorm = 153.9733, GNorm = 0.1056, lr_0 = 1.6764e-04
Loss = 3.0588e-03, PNorm = 153.9760, GNorm = 0.1013, lr_0 = 1.6752e-04
Loss = 1.3264e-03, PNorm = 153.9785, GNorm = 0.0836, lr_0 = 1.6741e-04
Loss = 1.2694e-03, PNorm = 153.9818, GNorm = 0.0744, lr_0 = 1.6729e-04
Loss = 1.2376e-03, PNorm = 153.9841, GNorm = 0.1427, lr_0 = 1.6718e-04
Loss = 1.4814e-03, PNorm = 153.9876, GNorm = 0.0509, lr_0 = 1.6707e-04
Loss = 1.4101e-03, PNorm = 153.9882, GNorm = 0.0578, lr_0 = 1.6695e-04
Loss = 1.9884e-03, PNorm = 153.9915, GNorm = 0.1085, lr_0 = 1.6684e-04
Loss = 1.3658e-03, PNorm = 153.9953, GNorm = 0.1801, lr_0 = 1.6672e-04
Loss = 2.0251e-03, PNorm = 153.9992, GNorm = 0.0725, lr_0 = 1.6661e-04
Loss = 3.8548e-03, PNorm = 154.0035, GNorm = 0.2771, lr_0 = 1.6649e-04
Loss = 3.6599e-03, PNorm = 154.0071, GNorm = 0.1415, lr_0 = 1.6638e-04
Loss = 1.3337e-03, PNorm = 154.0115, GNorm = 0.0819, lr_0 = 1.6627e-04
Loss = 2.5372e-03, PNorm = 154.0165, GNorm = 0.1737, lr_0 = 1.6615e-04
Loss = 2.2093e-03, PNorm = 154.0225, GNorm = 0.1265, lr_0 = 1.6604e-04
Loss = 1.2635e-03, PNorm = 154.0262, GNorm = 0.0367, lr_0 = 1.6592e-04
Loss = 2.0967e-03, PNorm = 154.0295, GNorm = 0.1496, lr_0 = 1.6581e-04
Loss = 1.1380e-03, PNorm = 154.0329, GNorm = 0.1586, lr_0 = 1.6570e-04
Loss = 2.1053e-03, PNorm = 154.0355, GNorm = 0.3917, lr_0 = 1.6558e-04
Loss = 1.4109e-03, PNorm = 154.0408, GNorm = 0.0802, lr_0 = 1.6547e-04
Loss = 3.5609e-03, PNorm = 154.0427, GNorm = 0.0951, lr_0 = 1.6536e-04
Loss = 2.2880e-03, PNorm = 154.0452, GNorm = 0.1820, lr_0 = 1.6524e-04
Loss = 1.2789e-03, PNorm = 154.0519, GNorm = 0.0838, lr_0 = 1.6513e-04
Loss = 1.5748e-03, PNorm = 154.0585, GNorm = 0.0498, lr_0 = 1.6502e-04
Loss = 1.7980e-03, PNorm = 154.0648, GNorm = 0.0781, lr_0 = 1.6490e-04
Loss = 1.7918e-03, PNorm = 154.0681, GNorm = 0.0297, lr_0 = 1.6479e-04
Loss = 2.0634e-03, PNorm = 154.0708, GNorm = 0.2722, lr_0 = 1.6468e-04
Loss = 3.1875e-03, PNorm = 154.0720, GNorm = 0.1378, lr_0 = 1.6457e-04
Loss = 1.4037e-03, PNorm = 154.0764, GNorm = 0.1015, lr_0 = 1.6445e-04
Loss = 1.9176e-03, PNorm = 154.0766, GNorm = 0.1417, lr_0 = 1.6434e-04
Loss = 1.3438e-03, PNorm = 154.0781, GNorm = 0.1742, lr_0 = 1.6423e-04
Loss = 4.6953e-03, PNorm = 154.0810, GNorm = 0.1061, lr_0 = 1.6412e-04
Loss = 1.6366e-03, PNorm = 154.0833, GNorm = 0.1835, lr_0 = 1.6400e-04
Loss = 1.3098e-03, PNorm = 154.0866, GNorm = 0.0783, lr_0 = 1.6389e-04
Loss = 1.2540e-03, PNorm = 154.0911, GNorm = 0.0283, lr_0 = 1.6378e-04
Validation mae = 0.475695
Epoch 24
Loss = 2.3298e-03, PNorm = 154.0937, GNorm = 0.1302, lr_0 = 1.6367e-04
Loss = 1.2135e-03, PNorm = 154.0938, GNorm = 0.1494, lr_0 = 1.6355e-04
Loss = 1.2087e-03, PNorm = 154.0958, GNorm = 0.1115, lr_0 = 1.6344e-04
Loss = 9.0401e-04, PNorm = 154.0991, GNorm = 0.1283, lr_0 = 1.6333e-04
Loss = 9.7342e-04, PNorm = 154.1008, GNorm = 0.0602, lr_0 = 1.6322e-04
Loss = 1.5570e-03, PNorm = 154.1032, GNorm = 0.0341, lr_0 = 1.6311e-04
Loss = 9.1850e-04, PNorm = 154.1053, GNorm = 0.0355, lr_0 = 1.6299e-04
Loss = 2.9224e-03, PNorm = 154.1081, GNorm = 0.1675, lr_0 = 1.6288e-04
Loss = 1.1696e-03, PNorm = 154.1088, GNorm = 0.0878, lr_0 = 1.6277e-04
Loss = 1.0254e-03, PNorm = 154.1100, GNorm = 0.0925, lr_0 = 1.6266e-04
Loss = 2.6186e-03, PNorm = 154.1111, GNorm = 0.0592, lr_0 = 1.6255e-04
Loss = 9.7238e-04, PNorm = 154.1134, GNorm = 0.0398, lr_0 = 1.6244e-04
Loss = 1.4045e-03, PNorm = 154.1167, GNorm = 0.1132, lr_0 = 1.6233e-04
Loss = 1.3922e-03, PNorm = 154.1187, GNorm = 0.0839, lr_0 = 1.6221e-04
Loss = 2.2623e-03, PNorm = 154.1220, GNorm = 0.0912, lr_0 = 1.6210e-04
Loss = 1.3330e-03, PNorm = 154.1243, GNorm = 0.0983, lr_0 = 1.6199e-04
Loss = 2.5003e-03, PNorm = 154.1279, GNorm = 0.1726, lr_0 = 1.6188e-04
Loss = 1.3180e-03, PNorm = 154.1300, GNorm = 0.1500, lr_0 = 1.6177e-04
Loss = 9.5238e-04, PNorm = 154.1312, GNorm = 0.0400, lr_0 = 1.6166e-04
Loss = 1.2183e-03, PNorm = 154.1318, GNorm = 0.1566, lr_0 = 1.6155e-04
Loss = 4.2454e-03, PNorm = 154.1351, GNorm = 0.0323, lr_0 = 1.6144e-04
Loss = 2.9927e-03, PNorm = 154.1373, GNorm = 0.0768, lr_0 = 1.6133e-04
Loss = 1.4774e-03, PNorm = 154.1408, GNorm = 0.1173, lr_0 = 1.6122e-04
Loss = 2.9983e-03, PNorm = 154.1423, GNorm = 0.1371, lr_0 = 1.6111e-04
Loss = 1.6146e-03, PNorm = 154.1468, GNorm = 0.1045, lr_0 = 1.6100e-04
Loss = 1.6044e-03, PNorm = 154.1491, GNorm = 0.0627, lr_0 = 1.6089e-04
Loss = 2.9123e-03, PNorm = 154.1538, GNorm = 0.1416, lr_0 = 1.6078e-04
Loss = 2.0897e-03, PNorm = 154.1566, GNorm = 0.2092, lr_0 = 1.6067e-04
Loss = 9.5898e-04, PNorm = 154.1601, GNorm = 0.1220, lr_0 = 1.6056e-04
Loss = 1.1907e-03, PNorm = 154.1616, GNorm = 0.0705, lr_0 = 1.6045e-04
Loss = 1.2396e-03, PNorm = 154.1645, GNorm = 0.0444, lr_0 = 1.6034e-04
Loss = 1.4665e-03, PNorm = 154.1658, GNorm = 0.1399, lr_0 = 1.6023e-04
Loss = 1.3379e-03, PNorm = 154.1671, GNorm = 0.1583, lr_0 = 1.6012e-04
Loss = 1.7245e-03, PNorm = 154.1699, GNorm = 0.1468, lr_0 = 1.6001e-04
Loss = 1.8190e-03, PNorm = 154.1742, GNorm = 0.0617, lr_0 = 1.5990e-04
Loss = 3.1376e-03, PNorm = 154.1756, GNorm = 0.3678, lr_0 = 1.5979e-04
Loss = 1.0199e-03, PNorm = 154.1761, GNorm = 0.1406, lr_0 = 1.5968e-04
Loss = 1.6611e-03, PNorm = 154.1773, GNorm = 0.2675, lr_0 = 1.5957e-04
Loss = 1.9112e-03, PNorm = 154.1782, GNorm = 0.2288, lr_0 = 1.5946e-04
Loss = 3.2852e-03, PNorm = 154.1782, GNorm = 0.0813, lr_0 = 1.5935e-04
Loss = 1.2702e-03, PNorm = 154.1809, GNorm = 0.1507, lr_0 = 1.5924e-04
Loss = 1.2085e-03, PNorm = 154.1847, GNorm = 0.0807, lr_0 = 1.5913e-04
Loss = 1.7045e-03, PNorm = 154.1880, GNorm = 0.1885, lr_0 = 1.5902e-04
Loss = 1.0038e-03, PNorm = 154.1921, GNorm = 0.1067, lr_0 = 1.5891e-04
Loss = 1.7420e-03, PNorm = 154.1933, GNorm = 0.1323, lr_0 = 1.5880e-04
Loss = 1.3299e-03, PNorm = 154.1969, GNorm = 0.0294, lr_0 = 1.5870e-04
Loss = 1.0033e-03, PNorm = 154.1995, GNorm = 0.0338, lr_0 = 1.5859e-04
Loss = 1.7526e-03, PNorm = 154.2029, GNorm = 0.0575, lr_0 = 1.5848e-04
Loss = 1.4920e-03, PNorm = 154.2051, GNorm = 0.0815, lr_0 = 1.5837e-04
Loss = 1.6647e-03, PNorm = 154.2069, GNorm = 0.0683, lr_0 = 1.5826e-04
Loss = 2.0672e-03, PNorm = 154.2078, GNorm = 0.0635, lr_0 = 1.5815e-04
Loss = 1.0597e-03, PNorm = 154.2104, GNorm = 0.1410, lr_0 = 1.5804e-04
Loss = 2.2782e-03, PNorm = 154.2125, GNorm = 0.3225, lr_0 = 1.5794e-04
Loss = 1.0302e-03, PNorm = 154.2158, GNorm = 0.0958, lr_0 = 1.5783e-04
Loss = 1.0319e-03, PNorm = 154.2188, GNorm = 0.0477, lr_0 = 1.5772e-04
Loss = 1.0153e-03, PNorm = 154.2203, GNorm = 0.0863, lr_0 = 1.5761e-04
Loss = 1.7547e-03, PNorm = 154.2229, GNorm = 0.1325, lr_0 = 1.5750e-04
Loss = 1.7903e-03, PNorm = 154.2258, GNorm = 0.1172, lr_0 = 1.5740e-04
Loss = 4.7732e-03, PNorm = 154.2270, GNorm = 0.2888, lr_0 = 1.5729e-04
Loss = 3.2687e-03, PNorm = 154.2309, GNorm = 0.4839, lr_0 = 1.5718e-04
Loss = 1.6865e-03, PNorm = 154.2317, GNorm = 0.0577, lr_0 = 1.5707e-04
Loss = 1.2587e-03, PNorm = 154.2352, GNorm = 0.0570, lr_0 = 1.5697e-04
Loss = 1.3614e-03, PNorm = 154.2396, GNorm = 0.0847, lr_0 = 1.5686e-04
Loss = 2.3636e-03, PNorm = 154.2438, GNorm = 0.1980, lr_0 = 1.5675e-04
Loss = 1.0653e-03, PNorm = 154.2441, GNorm = 0.1088, lr_0 = 1.5664e-04
Loss = 1.3492e-03, PNorm = 154.2447, GNorm = 0.0900, lr_0 = 1.5654e-04
Loss = 2.1906e-03, PNorm = 154.2459, GNorm = 0.0826, lr_0 = 1.5643e-04
Loss = 2.0714e-03, PNorm = 154.2490, GNorm = 0.1893, lr_0 = 1.5632e-04
Loss = 2.0390e-03, PNorm = 154.2485, GNorm = 0.0898, lr_0 = 1.5621e-04
Loss = 1.3531e-03, PNorm = 154.2508, GNorm = 0.0890, lr_0 = 1.5611e-04
Loss = 3.1987e-03, PNorm = 154.2548, GNorm = 0.0722, lr_0 = 1.5600e-04
Loss = 1.0697e-03, PNorm = 154.2566, GNorm = 0.1731, lr_0 = 1.5589e-04
Loss = 9.8575e-04, PNorm = 154.2603, GNorm = 0.1751, lr_0 = 1.5579e-04
Loss = 1.3212e-03, PNorm = 154.2616, GNorm = 0.2050, lr_0 = 1.5568e-04
Loss = 7.9436e-04, PNorm = 154.2646, GNorm = 0.0435, lr_0 = 1.5557e-04
Loss = 2.8721e-03, PNorm = 154.2676, GNorm = 0.2433, lr_0 = 1.5547e-04
Loss = 1.6402e-03, PNorm = 154.2714, GNorm = 0.0478, lr_0 = 1.5536e-04
Loss = 9.8352e-04, PNorm = 154.2746, GNorm = 0.0801, lr_0 = 1.5525e-04
Loss = 1.2121e-03, PNorm = 154.2768, GNorm = 0.1578, lr_0 = 1.5515e-04
Loss = 1.3900e-03, PNorm = 154.2791, GNorm = 0.1885, lr_0 = 1.5504e-04
Loss = 2.2777e-03, PNorm = 154.2832, GNorm = 0.0385, lr_0 = 1.5493e-04
Loss = 1.3301e-03, PNorm = 154.2865, GNorm = 0.0857, lr_0 = 1.5483e-04
Loss = 1.2026e-03, PNorm = 154.2901, GNorm = 0.0319, lr_0 = 1.5472e-04
Loss = 2.1751e-03, PNorm = 154.2900, GNorm = 0.1032, lr_0 = 1.5462e-04
Loss = 1.3969e-03, PNorm = 154.2936, GNorm = 0.0766, lr_0 = 1.5451e-04
Loss = 2.5051e-03, PNorm = 154.2968, GNorm = 0.3421, lr_0 = 1.5440e-04
Loss = 1.6528e-03, PNorm = 154.2984, GNorm = 0.0936, lr_0 = 1.5430e-04
Loss = 9.1962e-04, PNorm = 154.3014, GNorm = 0.0694, lr_0 = 1.5419e-04
Loss = 1.6126e-03, PNorm = 154.3056, GNorm = 0.0680, lr_0 = 1.5409e-04
Loss = 1.0975e-03, PNorm = 154.3075, GNorm = 0.0790, lr_0 = 1.5398e-04
Loss = 2.5997e-03, PNorm = 154.3088, GNorm = 0.0381, lr_0 = 1.5388e-04
Loss = 1.8500e-03, PNorm = 154.3092, GNorm = 0.1106, lr_0 = 1.5377e-04
Loss = 9.0473e-04, PNorm = 154.3100, GNorm = 0.0343, lr_0 = 1.5367e-04
Loss = 1.5397e-03, PNorm = 154.3124, GNorm = 0.0430, lr_0 = 1.5356e-04
Loss = 1.1672e-03, PNorm = 154.3157, GNorm = 0.0507, lr_0 = 1.5346e-04
Loss = 1.1615e-03, PNorm = 154.3198, GNorm = 0.0971, lr_0 = 1.5335e-04
Loss = 1.8984e-03, PNorm = 154.3217, GNorm = 0.2535, lr_0 = 1.5325e-04
Loss = 2.6509e-03, PNorm = 154.3229, GNorm = 0.4473, lr_0 = 1.5314e-04
Loss = 2.2412e-03, PNorm = 154.3260, GNorm = 0.2030, lr_0 = 1.5304e-04
Loss = 1.0925e-03, PNorm = 154.3268, GNorm = 0.0565, lr_0 = 1.5293e-04
Loss = 1.1987e-03, PNorm = 154.3299, GNorm = 0.0643, lr_0 = 1.5283e-04
Loss = 1.6130e-03, PNorm = 154.3320, GNorm = 0.1020, lr_0 = 1.5272e-04
Loss = 1.0682e-03, PNorm = 154.3366, GNorm = 0.1045, lr_0 = 1.5262e-04
Loss = 1.1981e-03, PNorm = 154.3388, GNorm = 0.1092, lr_0 = 1.5251e-04
Loss = 1.9492e-03, PNorm = 154.3416, GNorm = 0.0446, lr_0 = 1.5241e-04
Loss = 2.9353e-03, PNorm = 154.3440, GNorm = 0.1984, lr_0 = 1.5230e-04
Loss = 2.2262e-03, PNorm = 154.3471, GNorm = 0.1605, lr_0 = 1.5220e-04
Loss = 2.6594e-03, PNorm = 154.3512, GNorm = 0.1053, lr_0 = 1.5209e-04
Loss = 1.3144e-03, PNorm = 154.3518, GNorm = 0.0985, lr_0 = 1.5199e-04
Loss = 1.4878e-03, PNorm = 154.3516, GNorm = 0.0509, lr_0 = 1.5189e-04
Loss = 1.6875e-03, PNorm = 154.3518, GNorm = 0.3642, lr_0 = 1.5178e-04
Loss = 1.5681e-03, PNorm = 154.3530, GNorm = 0.1168, lr_0 = 1.5168e-04
Loss = 1.7201e-03, PNorm = 154.3571, GNorm = 0.1867, lr_0 = 1.5157e-04
Loss = 1.2476e-03, PNorm = 154.3603, GNorm = 0.1123, lr_0 = 1.5147e-04
Loss = 1.0931e-03, PNorm = 154.3636, GNorm = 0.0683, lr_0 = 1.5137e-04
Loss = 1.2952e-03, PNorm = 154.3675, GNorm = 0.0574, lr_0 = 1.5126e-04
Loss = 1.5075e-03, PNorm = 154.3699, GNorm = 0.0742, lr_0 = 1.5116e-04
Loss = 2.2770e-03, PNorm = 154.3713, GNorm = 0.0438, lr_0 = 1.5106e-04
Loss = 1.2573e-03, PNorm = 154.3744, GNorm = 0.0714, lr_0 = 1.5095e-04
Loss = 1.6184e-03, PNorm = 154.3770, GNorm = 0.0444, lr_0 = 1.5085e-04
Validation mae = 0.476381
Epoch 25
Loss = 1.2992e-03, PNorm = 154.3789, GNorm = 0.0334, lr_0 = 1.5075e-04
Loss = 3.6360e-03, PNorm = 154.3790, GNorm = 0.2551, lr_0 = 1.5064e-04
Loss = 1.0233e-03, PNorm = 154.3789, GNorm = 0.0654, lr_0 = 1.5054e-04
Loss = 1.3215e-03, PNorm = 154.3811, GNorm = 0.1710, lr_0 = 1.5044e-04
Loss = 7.2181e-04, PNorm = 154.3829, GNorm = 0.0526, lr_0 = 1.5033e-04
Loss = 7.6913e-04, PNorm = 154.3861, GNorm = 0.0361, lr_0 = 1.5023e-04
Loss = 8.6672e-04, PNorm = 154.3886, GNorm = 0.0929, lr_0 = 1.5013e-04
Loss = 8.2669e-04, PNorm = 154.3887, GNorm = 0.0621, lr_0 = 1.5002e-04
Loss = 1.3364e-03, PNorm = 154.3892, GNorm = 0.0851, lr_0 = 1.4992e-04
Loss = 8.9395e-04, PNorm = 154.3901, GNorm = 0.2822, lr_0 = 1.4982e-04
Loss = 2.4956e-03, PNorm = 154.3930, GNorm = 0.4173, lr_0 = 1.4972e-04
Loss = 7.1526e-04, PNorm = 154.3961, GNorm = 0.0563, lr_0 = 1.4961e-04
Loss = 1.7768e-03, PNorm = 154.3986, GNorm = 0.1376, lr_0 = 1.4951e-04
Loss = 2.4337e-03, PNorm = 154.4025, GNorm = 0.2347, lr_0 = 1.4941e-04
Loss = 1.9770e-03, PNorm = 154.4051, GNorm = 0.1190, lr_0 = 1.4931e-04
Loss = 2.0773e-03, PNorm = 154.4068, GNorm = 0.0518, lr_0 = 1.4920e-04
Loss = 1.5695e-03, PNorm = 154.4085, GNorm = 0.1402, lr_0 = 1.4910e-04
Loss = 8.9650e-04, PNorm = 154.4100, GNorm = 0.0391, lr_0 = 1.4900e-04
Loss = 8.2246e-04, PNorm = 154.4112, GNorm = 0.0971, lr_0 = 1.4890e-04
Loss = 1.0985e-03, PNorm = 154.4111, GNorm = 0.0851, lr_0 = 1.4880e-04
Loss = 3.7868e-03, PNorm = 154.4133, GNorm = 0.0587, lr_0 = 1.4869e-04
Loss = 2.0032e-03, PNorm = 154.4154, GNorm = 0.1168, lr_0 = 1.4859e-04
Loss = 2.7223e-03, PNorm = 154.4174, GNorm = 0.0413, lr_0 = 1.4849e-04
Loss = 9.7223e-04, PNorm = 154.4206, GNorm = 0.0381, lr_0 = 1.4839e-04
Loss = 9.1105e-04, PNorm = 154.4224, GNorm = 0.1221, lr_0 = 1.4829e-04
Loss = 8.9920e-04, PNorm = 154.4241, GNorm = 0.0549, lr_0 = 1.4818e-04
Loss = 1.4560e-03, PNorm = 154.4248, GNorm = 0.0527, lr_0 = 1.4808e-04
Loss = 1.1689e-03, PNorm = 154.4272, GNorm = 0.1303, lr_0 = 1.4798e-04
Loss = 1.2328e-03, PNorm = 154.4282, GNorm = 0.0656, lr_0 = 1.4788e-04
Loss = 8.4817e-04, PNorm = 154.4289, GNorm = 0.0625, lr_0 = 1.4778e-04
Loss = 8.5000e-04, PNorm = 154.4284, GNorm = 0.0729, lr_0 = 1.4768e-04
Loss = 8.5280e-04, PNorm = 154.4300, GNorm = 0.1497, lr_0 = 1.4758e-04
Loss = 2.0710e-03, PNorm = 154.4327, GNorm = 0.0604, lr_0 = 1.4748e-04
Loss = 1.1220e-03, PNorm = 154.4346, GNorm = 0.0781, lr_0 = 1.4737e-04
Loss = 1.6095e-03, PNorm = 154.4374, GNorm = 0.0852, lr_0 = 1.4727e-04
Loss = 8.7335e-04, PNorm = 154.4373, GNorm = 0.0644, lr_0 = 1.4717e-04
Loss = 1.0494e-03, PNorm = 154.4381, GNorm = 0.0801, lr_0 = 1.4707e-04
Loss = 6.6471e-04, PNorm = 154.4387, GNorm = 0.1293, lr_0 = 1.4697e-04
Loss = 6.8102e-04, PNorm = 154.4403, GNorm = 0.0536, lr_0 = 1.4687e-04
Loss = 1.0001e-03, PNorm = 154.4431, GNorm = 0.1294, lr_0 = 1.4677e-04
Loss = 1.1735e-03, PNorm = 154.4481, GNorm = 0.0921, lr_0 = 1.4667e-04
Loss = 2.2891e-03, PNorm = 154.4515, GNorm = 0.0439, lr_0 = 1.4657e-04
Loss = 8.4179e-04, PNorm = 154.4538, GNorm = 0.1108, lr_0 = 1.4647e-04
Loss = 1.1134e-03, PNorm = 154.4564, GNorm = 0.1384, lr_0 = 1.4637e-04
Loss = 2.5900e-03, PNorm = 154.4583, GNorm = 0.2121, lr_0 = 1.4627e-04
Loss = 1.6025e-03, PNorm = 154.4601, GNorm = 0.1549, lr_0 = 1.4617e-04
Loss = 8.3383e-04, PNorm = 154.4624, GNorm = 0.1427, lr_0 = 1.4607e-04
Loss = 8.1939e-04, PNorm = 154.4648, GNorm = 0.1021, lr_0 = 1.4597e-04
Loss = 3.8393e-03, PNorm = 154.4658, GNorm = 0.8797, lr_0 = 1.4587e-04
Loss = 1.5049e-03, PNorm = 154.4681, GNorm = 0.1383, lr_0 = 1.4577e-04
Loss = 1.0549e-03, PNorm = 154.4698, GNorm = 0.0484, lr_0 = 1.4567e-04
Loss = 1.3098e-03, PNorm = 154.4711, GNorm = 0.1521, lr_0 = 1.4557e-04
Loss = 1.0318e-03, PNorm = 154.4721, GNorm = 0.2140, lr_0 = 1.4547e-04
Loss = 8.0095e-04, PNorm = 154.4763, GNorm = 0.0645, lr_0 = 1.4537e-04
Loss = 1.4674e-03, PNorm = 154.4793, GNorm = 0.1536, lr_0 = 1.4527e-04
Loss = 8.6273e-04, PNorm = 154.4825, GNorm = 0.0709, lr_0 = 1.4517e-04
Loss = 1.2023e-03, PNorm = 154.4859, GNorm = 0.1120, lr_0 = 1.4507e-04
Loss = 9.9504e-04, PNorm = 154.4906, GNorm = 0.1066, lr_0 = 1.4497e-04
Loss = 1.4464e-03, PNorm = 154.4927, GNorm = 0.0802, lr_0 = 1.4487e-04
Loss = 1.4238e-03, PNorm = 154.4934, GNorm = 0.1597, lr_0 = 1.4477e-04
Loss = 3.4432e-03, PNorm = 154.4948, GNorm = 0.3040, lr_0 = 1.4467e-04
Loss = 1.2439e-03, PNorm = 154.4940, GNorm = 0.1803, lr_0 = 1.4457e-04
Loss = 1.4031e-03, PNorm = 154.4962, GNorm = 0.0569, lr_0 = 1.4447e-04
Loss = 9.3601e-04, PNorm = 154.4976, GNorm = 0.1734, lr_0 = 1.4438e-04
Loss = 9.0506e-04, PNorm = 154.4984, GNorm = 0.0667, lr_0 = 1.4428e-04
Loss = 1.8934e-03, PNorm = 154.5011, GNorm = 0.0687, lr_0 = 1.4418e-04
Loss = 2.3896e-03, PNorm = 154.5027, GNorm = 0.1432, lr_0 = 1.4408e-04
Loss = 8.5240e-04, PNorm = 154.5062, GNorm = 0.1083, lr_0 = 1.4398e-04
Loss = 3.3041e-03, PNorm = 154.5086, GNorm = 0.0518, lr_0 = 1.4388e-04
Loss = 1.3238e-03, PNorm = 154.5118, GNorm = 0.1678, lr_0 = 1.4378e-04
Loss = 3.0762e-03, PNorm = 154.5152, GNorm = 0.4917, lr_0 = 1.4368e-04
Loss = 1.1721e-03, PNorm = 154.5173, GNorm = 0.0724, lr_0 = 1.4359e-04
Loss = 1.3262e-03, PNorm = 154.5200, GNorm = 0.3053, lr_0 = 1.4349e-04
Loss = 1.1139e-03, PNorm = 154.5210, GNorm = 0.1031, lr_0 = 1.4339e-04
Loss = 1.7767e-03, PNorm = 154.5219, GNorm = 0.0783, lr_0 = 1.4329e-04
Loss = 3.2707e-03, PNorm = 154.5223, GNorm = 0.1054, lr_0 = 1.4319e-04
Loss = 1.2564e-03, PNorm = 154.5225, GNorm = 0.0589, lr_0 = 1.4310e-04
Loss = 3.2345e-03, PNorm = 154.5262, GNorm = 0.2245, lr_0 = 1.4300e-04
Loss = 8.2529e-04, PNorm = 154.5280, GNorm = 0.0633, lr_0 = 1.4290e-04
Loss = 7.7434e-04, PNorm = 154.5308, GNorm = 0.1126, lr_0 = 1.4280e-04
Loss = 1.4587e-03, PNorm = 154.5306, GNorm = 0.0741, lr_0 = 1.4270e-04
Loss = 1.1004e-03, PNorm = 154.5323, GNorm = 0.0826, lr_0 = 1.4261e-04
Loss = 1.4896e-03, PNorm = 154.5343, GNorm = 0.1876, lr_0 = 1.4251e-04
Loss = 1.1793e-03, PNorm = 154.5367, GNorm = 0.1525, lr_0 = 1.4241e-04
Loss = 1.8548e-03, PNorm = 154.5401, GNorm = 0.1163, lr_0 = 1.4231e-04
Loss = 9.0066e-04, PNorm = 154.5447, GNorm = 0.1154, lr_0 = 1.4222e-04
Loss = 3.8745e-03, PNorm = 154.5455, GNorm = 0.0980, lr_0 = 1.4212e-04
Loss = 1.3419e-03, PNorm = 154.5488, GNorm = 0.0324, lr_0 = 1.4202e-04
Loss = 2.8045e-03, PNorm = 154.5530, GNorm = 0.0643, lr_0 = 1.4192e-04
Loss = 1.0061e-03, PNorm = 154.5551, GNorm = 0.1402, lr_0 = 1.4183e-04
Loss = 1.8205e-03, PNorm = 154.5571, GNorm = 0.0998, lr_0 = 1.4173e-04
Loss = 2.1020e-03, PNorm = 154.5580, GNorm = 0.1629, lr_0 = 1.4163e-04
Loss = 9.6072e-04, PNorm = 154.5604, GNorm = 0.0427, lr_0 = 1.4153e-04
Loss = 9.6591e-04, PNorm = 154.5631, GNorm = 0.0592, lr_0 = 1.4144e-04
Loss = 5.0708e-03, PNorm = 154.5643, GNorm = 0.2705, lr_0 = 1.4134e-04
Loss = 2.4966e-03, PNorm = 154.5692, GNorm = 0.0544, lr_0 = 1.4124e-04
Loss = 1.3583e-03, PNorm = 154.5704, GNorm = 0.0850, lr_0 = 1.4115e-04
Loss = 1.8302e-03, PNorm = 154.5726, GNorm = 0.2314, lr_0 = 1.4105e-04
Loss = 1.1209e-03, PNorm = 154.5751, GNorm = 0.1678, lr_0 = 1.4095e-04
Loss = 1.5852e-03, PNorm = 154.5767, GNorm = 0.1391, lr_0 = 1.4086e-04
Loss = 2.2266e-03, PNorm = 154.5808, GNorm = 0.0679, lr_0 = 1.4076e-04
Loss = 2.0987e-03, PNorm = 154.5843, GNorm = 0.0754, lr_0 = 1.4066e-04
Loss = 1.7609e-03, PNorm = 154.5845, GNorm = 0.1179, lr_0 = 1.4057e-04
Loss = 1.0624e-03, PNorm = 154.5879, GNorm = 0.1942, lr_0 = 1.4047e-04
Loss = 9.6173e-04, PNorm = 154.5895, GNorm = 0.0384, lr_0 = 1.4038e-04
Loss = 1.1334e-03, PNorm = 154.5927, GNorm = 0.1018, lr_0 = 1.4028e-04
Loss = 1.6710e-03, PNorm = 154.5955, GNorm = 0.0734, lr_0 = 1.4018e-04
Loss = 7.9811e-04, PNorm = 154.5982, GNorm = 0.0428, lr_0 = 1.4009e-04
Loss = 1.2213e-03, PNorm = 154.6010, GNorm = 0.0370, lr_0 = 1.3999e-04
Loss = 1.1232e-03, PNorm = 154.6039, GNorm = 0.0721, lr_0 = 1.3990e-04
Loss = 1.6444e-03, PNorm = 154.6065, GNorm = 0.1006, lr_0 = 1.3980e-04
Loss = 1.6228e-03, PNorm = 154.6075, GNorm = 0.1102, lr_0 = 1.3970e-04
Loss = 1.7682e-03, PNorm = 154.6113, GNorm = 0.0960, lr_0 = 1.3961e-04
Loss = 1.1201e-03, PNorm = 154.6127, GNorm = 0.0761, lr_0 = 1.3951e-04
Loss = 1.0101e-03, PNorm = 154.6158, GNorm = 0.1366, lr_0 = 1.3942e-04
Loss = 1.2982e-03, PNorm = 154.6189, GNorm = 0.0635, lr_0 = 1.3932e-04
Loss = 9.8038e-04, PNorm = 154.6227, GNorm = 0.1309, lr_0 = 1.3923e-04
Loss = 1.0389e-03, PNorm = 154.6239, GNorm = 0.0471, lr_0 = 1.3913e-04
Loss = 1.9355e-03, PNorm = 154.6247, GNorm = 0.0707, lr_0 = 1.3904e-04
Loss = 1.9705e-03, PNorm = 154.6258, GNorm = 0.1281, lr_0 = 1.3894e-04
Validation mae = 0.476175
Epoch 26
Loss = 6.6147e-04, PNorm = 154.6266, GNorm = 0.0645, lr_0 = 1.3884e-04
Loss = 1.9204e-03, PNorm = 154.6279, GNorm = 0.0572, lr_0 = 1.3875e-04
Loss = 1.5529e-03, PNorm = 154.6281, GNorm = 0.1377, lr_0 = 1.3865e-04
Loss = 1.3104e-03, PNorm = 154.6306, GNorm = 0.2584, lr_0 = 1.3856e-04
Loss = 6.4227e-04, PNorm = 154.6325, GNorm = 0.0951, lr_0 = 1.3846e-04
Loss = 8.1231e-04, PNorm = 154.6342, GNorm = 0.1263, lr_0 = 1.3837e-04
Loss = 9.6091e-04, PNorm = 154.6359, GNorm = 0.0581, lr_0 = 1.3828e-04
Loss = 8.5013e-04, PNorm = 154.6373, GNorm = 0.0352, lr_0 = 1.3818e-04
Loss = 1.2447e-03, PNorm = 154.6381, GNorm = 0.2023, lr_0 = 1.3809e-04
Loss = 2.5129e-03, PNorm = 154.6386, GNorm = 0.0644, lr_0 = 1.3799e-04
Loss = 2.2847e-03, PNorm = 154.6404, GNorm = 0.0848, lr_0 = 1.3790e-04
Loss = 8.2773e-04, PNorm = 154.6414, GNorm = 0.2353, lr_0 = 1.3780e-04
Loss = 1.8243e-03, PNorm = 154.6425, GNorm = 0.0334, lr_0 = 1.3771e-04
Loss = 9.1472e-04, PNorm = 154.6446, GNorm = 0.0897, lr_0 = 1.3761e-04
Loss = 9.2548e-04, PNorm = 154.6462, GNorm = 0.1474, lr_0 = 1.3752e-04
Loss = 1.4340e-03, PNorm = 154.6485, GNorm = 0.0528, lr_0 = 1.3742e-04
Loss = 1.0802e-03, PNorm = 154.6491, GNorm = 0.0969, lr_0 = 1.3733e-04
Loss = 8.9008e-04, PNorm = 154.6500, GNorm = 0.0820, lr_0 = 1.3724e-04
Loss = 7.1112e-04, PNorm = 154.6511, GNorm = 0.0465, lr_0 = 1.3714e-04
Loss = 1.3802e-03, PNorm = 154.6537, GNorm = 0.0685, lr_0 = 1.3705e-04
Loss = 1.1587e-03, PNorm = 154.6572, GNorm = 0.1359, lr_0 = 1.3695e-04
Loss = 2.0380e-03, PNorm = 154.6602, GNorm = 0.2210, lr_0 = 1.3686e-04
Loss = 7.8080e-04, PNorm = 154.6621, GNorm = 0.1640, lr_0 = 1.3677e-04
Loss = 1.0303e-03, PNorm = 154.6638, GNorm = 0.0873, lr_0 = 1.3667e-04
Loss = 8.0463e-04, PNorm = 154.6640, GNorm = 0.1146, lr_0 = 1.3658e-04
Loss = 1.5710e-03, PNorm = 154.6645, GNorm = 0.0840, lr_0 = 1.3649e-04
Loss = 1.3075e-03, PNorm = 154.6652, GNorm = 0.0839, lr_0 = 1.3639e-04
Loss = 8.1363e-04, PNorm = 154.6666, GNorm = 0.0583, lr_0 = 1.3630e-04
Loss = 1.9898e-03, PNorm = 154.6677, GNorm = 0.0334, lr_0 = 1.3621e-04
Loss = 1.4469e-03, PNorm = 154.6691, GNorm = 0.0700, lr_0 = 1.3611e-04
Loss = 1.3400e-03, PNorm = 154.6708, GNorm = 0.0851, lr_0 = 1.3602e-04
Loss = 6.2724e-04, PNorm = 154.6733, GNorm = 0.1304, lr_0 = 1.3593e-04
Loss = 1.8822e-03, PNorm = 154.6758, GNorm = 0.0336, lr_0 = 1.3583e-04
Loss = 2.3564e-03, PNorm = 154.6782, GNorm = 0.0920, lr_0 = 1.3574e-04
Loss = 1.4317e-03, PNorm = 154.6784, GNorm = 0.0349, lr_0 = 1.3565e-04
Loss = 6.9601e-04, PNorm = 154.6802, GNorm = 0.0489, lr_0 = 1.3555e-04
Loss = 1.0094e-03, PNorm = 154.6812, GNorm = 0.0315, lr_0 = 1.3546e-04
Loss = 4.4986e-03, PNorm = 154.6834, GNorm = 0.0967, lr_0 = 1.3537e-04
Loss = 1.0080e-03, PNorm = 154.6844, GNorm = 0.1067, lr_0 = 1.3528e-04
Loss = 9.3237e-04, PNorm = 154.6856, GNorm = 0.1266, lr_0 = 1.3518e-04
Loss = 8.1366e-04, PNorm = 154.6865, GNorm = 0.2093, lr_0 = 1.3509e-04
Loss = 1.9917e-03, PNorm = 154.6897, GNorm = 0.1298, lr_0 = 1.3500e-04
Loss = 1.6280e-03, PNorm = 154.6918, GNorm = 0.1958, lr_0 = 1.3491e-04
Loss = 1.3207e-03, PNorm = 154.6938, GNorm = 0.0318, lr_0 = 1.3481e-04
Loss = 1.2479e-03, PNorm = 154.6952, GNorm = 0.0731, lr_0 = 1.3472e-04
Loss = 9.5966e-04, PNorm = 154.6964, GNorm = 0.0620, lr_0 = 1.3463e-04
Loss = 8.0987e-04, PNorm = 154.6962, GNorm = 0.0805, lr_0 = 1.3454e-04
Loss = 6.5663e-04, PNorm = 154.6972, GNorm = 0.0728, lr_0 = 1.3444e-04
Loss = 7.0319e-04, PNorm = 154.6981, GNorm = 0.1144, lr_0 = 1.3435e-04
Loss = 1.6549e-03, PNorm = 154.7007, GNorm = 0.0608, lr_0 = 1.3426e-04
Loss = 7.0229e-04, PNorm = 154.7018, GNorm = 0.0897, lr_0 = 1.3417e-04
Loss = 1.1880e-03, PNorm = 154.7041, GNorm = 0.1381, lr_0 = 1.3408e-04
Loss = 1.4947e-03, PNorm = 154.7059, GNorm = 0.0533, lr_0 = 1.3398e-04
Loss = 1.7354e-03, PNorm = 154.7084, GNorm = 0.0582, lr_0 = 1.3389e-04
Loss = 7.5249e-04, PNorm = 154.7115, GNorm = 0.1222, lr_0 = 1.3380e-04
Loss = 8.1515e-04, PNorm = 154.7115, GNorm = 0.1449, lr_0 = 1.3371e-04
Loss = 1.4308e-03, PNorm = 154.7138, GNorm = 0.0363, lr_0 = 1.3362e-04
Loss = 1.0045e-03, PNorm = 154.7145, GNorm = 0.0726, lr_0 = 1.3353e-04
Loss = 7.5052e-04, PNorm = 154.7142, GNorm = 0.0409, lr_0 = 1.3343e-04
Loss = 6.9178e-04, PNorm = 154.7149, GNorm = 0.0691, lr_0 = 1.3334e-04
Loss = 1.0673e-03, PNorm = 154.7170, GNorm = 0.1728, lr_0 = 1.3325e-04
Loss = 1.6596e-03, PNorm = 154.7196, GNorm = 0.1040, lr_0 = 1.3316e-04
Loss = 1.4059e-03, PNorm = 154.7233, GNorm = 0.0447, lr_0 = 1.3307e-04
Loss = 1.6188e-03, PNorm = 154.7251, GNorm = 0.0685, lr_0 = 1.3298e-04
Loss = 6.7494e-04, PNorm = 154.7276, GNorm = 0.1089, lr_0 = 1.3289e-04
Loss = 8.5469e-04, PNorm = 154.7296, GNorm = 0.1001, lr_0 = 1.3280e-04
Loss = 1.3507e-03, PNorm = 154.7293, GNorm = 0.1478, lr_0 = 1.3270e-04
Loss = 1.0644e-03, PNorm = 154.7319, GNorm = 0.0664, lr_0 = 1.3261e-04
Loss = 7.0054e-04, PNorm = 154.7330, GNorm = 0.1108, lr_0 = 1.3252e-04
Loss = 1.5292e-03, PNorm = 154.7347, GNorm = 0.2045, lr_0 = 1.3243e-04
Loss = 1.4187e-03, PNorm = 154.7370, GNorm = 0.0857, lr_0 = 1.3234e-04
Loss = 1.6240e-03, PNorm = 154.7382, GNorm = 0.0833, lr_0 = 1.3225e-04
Loss = 2.7459e-03, PNorm = 154.7415, GNorm = 0.0851, lr_0 = 1.3216e-04
Loss = 1.7706e-03, PNorm = 154.7430, GNorm = 0.0505, lr_0 = 1.3207e-04
Loss = 1.6222e-03, PNorm = 154.7443, GNorm = 0.0748, lr_0 = 1.3198e-04
Loss = 1.7965e-03, PNorm = 154.7460, GNorm = 0.2380, lr_0 = 1.3189e-04
Loss = 1.0752e-03, PNorm = 154.7483, GNorm = 0.0666, lr_0 = 1.3180e-04
Loss = 8.2038e-04, PNorm = 154.7503, GNorm = 0.1817, lr_0 = 1.3171e-04
Loss = 1.1261e-03, PNorm = 154.7521, GNorm = 0.1810, lr_0 = 1.3162e-04
Loss = 2.3261e-03, PNorm = 154.7539, GNorm = 0.0646, lr_0 = 1.3153e-04
Loss = 2.9360e-03, PNorm = 154.7561, GNorm = 0.0330, lr_0 = 1.3144e-04
Loss = 2.3967e-03, PNorm = 154.7564, GNorm = 0.2716, lr_0 = 1.3135e-04
Loss = 9.6029e-04, PNorm = 154.7585, GNorm = 0.0270, lr_0 = 1.3126e-04
Loss = 1.9100e-03, PNorm = 154.7598, GNorm = 0.1087, lr_0 = 1.3117e-04
Loss = 1.7005e-03, PNorm = 154.7617, GNorm = 0.0753, lr_0 = 1.3108e-04
Loss = 1.0384e-03, PNorm = 154.7633, GNorm = 0.0726, lr_0 = 1.3099e-04
Loss = 2.1338e-03, PNorm = 154.7660, GNorm = 0.0977, lr_0 = 1.3090e-04
Loss = 1.5661e-03, PNorm = 154.7688, GNorm = 0.1047, lr_0 = 1.3081e-04
Loss = 8.2834e-04, PNorm = 154.7713, GNorm = 0.0662, lr_0 = 1.3072e-04
Loss = 9.1507e-04, PNorm = 154.7724, GNorm = 0.0481, lr_0 = 1.3063e-04
Loss = 1.0464e-03, PNorm = 154.7728, GNorm = 0.2477, lr_0 = 1.3054e-04
Loss = 1.0379e-03, PNorm = 154.7733, GNorm = 0.0683, lr_0 = 1.3045e-04
Loss = 2.3471e-03, PNorm = 154.7736, GNorm = 0.1199, lr_0 = 1.3036e-04
Loss = 2.1281e-03, PNorm = 154.7742, GNorm = 0.4039, lr_0 = 1.3027e-04
Loss = 3.5767e-03, PNorm = 154.7750, GNorm = 0.0911, lr_0 = 1.3018e-04
Loss = 1.0326e-03, PNorm = 154.7783, GNorm = 0.0639, lr_0 = 1.3009e-04
Loss = 1.1256e-03, PNorm = 154.7795, GNorm = 0.0863, lr_0 = 1.3000e-04
Loss = 7.5216e-04, PNorm = 154.7824, GNorm = 0.1461, lr_0 = 1.2992e-04
Loss = 3.4621e-03, PNorm = 154.7858, GNorm = 0.0237, lr_0 = 1.2983e-04
Loss = 1.8727e-03, PNorm = 154.7864, GNorm = 0.0942, lr_0 = 1.2974e-04
Loss = 1.7409e-03, PNorm = 154.7889, GNorm = 0.1298, lr_0 = 1.2965e-04
Loss = 7.7381e-04, PNorm = 154.7897, GNorm = 0.1182, lr_0 = 1.2956e-04
Loss = 7.1775e-04, PNorm = 154.7918, GNorm = 0.2206, lr_0 = 1.2947e-04
Loss = 1.7424e-03, PNorm = 154.7947, GNorm = 0.0246, lr_0 = 1.2938e-04
Loss = 1.6119e-03, PNorm = 154.7984, GNorm = 0.0768, lr_0 = 1.2929e-04
Loss = 9.1688e-04, PNorm = 154.8005, GNorm = 0.0663, lr_0 = 1.2921e-04
Loss = 2.9196e-03, PNorm = 154.8009, GNorm = 0.1591, lr_0 = 1.2912e-04
Loss = 8.1310e-04, PNorm = 154.8007, GNorm = 0.1584, lr_0 = 1.2903e-04
Loss = 7.0979e-04, PNorm = 154.8011, GNorm = 0.0825, lr_0 = 1.2894e-04
Loss = 8.3438e-04, PNorm = 154.8024, GNorm = 0.1184, lr_0 = 1.2885e-04
Loss = 1.4307e-03, PNorm = 154.8037, GNorm = 0.0484, lr_0 = 1.2876e-04
Loss = 1.8244e-03, PNorm = 154.8067, GNorm = 0.1499, lr_0 = 1.2867e-04
Loss = 1.8534e-03, PNorm = 154.8087, GNorm = 0.0749, lr_0 = 1.2859e-04
Loss = 1.8955e-03, PNorm = 154.8114, GNorm = 0.0783, lr_0 = 1.2850e-04
Loss = 1.9734e-03, PNorm = 154.8122, GNorm = 0.1472, lr_0 = 1.2841e-04
Loss = 1.7371e-03, PNorm = 154.8152, GNorm = 0.0285, lr_0 = 1.2832e-04
Loss = 6.4806e-04, PNorm = 154.8172, GNorm = 0.0862, lr_0 = 1.2823e-04
Loss = 1.5959e-03, PNorm = 154.8210, GNorm = 0.1481, lr_0 = 1.2815e-04
Loss = 1.3683e-03, PNorm = 154.8212, GNorm = 0.0446, lr_0 = 1.2806e-04
Loss = 1.9484e-03, PNorm = 154.8221, GNorm = 0.1267, lr_0 = 1.2797e-04
Validation mae = 0.475457
Epoch 27
Loss = 1.1210e-03, PNorm = 154.8222, GNorm = 0.0345, lr_0 = 1.2788e-04
Loss = 9.9402e-04, PNorm = 154.8247, GNorm = 0.0315, lr_0 = 1.2780e-04
Loss = 7.2465e-04, PNorm = 154.8279, GNorm = 0.1381, lr_0 = 1.2771e-04
Loss = 7.2654e-04, PNorm = 154.8295, GNorm = 0.0464, lr_0 = 1.2762e-04
Loss = 8.7307e-04, PNorm = 154.8318, GNorm = 0.1043, lr_0 = 1.2753e-04
Loss = 8.0389e-04, PNorm = 154.8327, GNorm = 0.0530, lr_0 = 1.2745e-04
Loss = 8.8070e-04, PNorm = 154.8356, GNorm = 0.0362, lr_0 = 1.2736e-04
Loss = 9.4249e-04, PNorm = 154.8365, GNorm = 0.0500, lr_0 = 1.2727e-04
Loss = 1.3631e-03, PNorm = 154.8383, GNorm = 0.0318, lr_0 = 1.2718e-04
Loss = 8.4395e-04, PNorm = 154.8403, GNorm = 0.0727, lr_0 = 1.2710e-04
Loss = 9.3702e-04, PNorm = 154.8420, GNorm = 0.0997, lr_0 = 1.2701e-04
Loss = 1.1278e-03, PNorm = 154.8439, GNorm = 0.0365, lr_0 = 1.2692e-04
Loss = 6.3878e-04, PNorm = 154.8454, GNorm = 0.0380, lr_0 = 1.2684e-04
Loss = 9.3146e-04, PNorm = 154.8471, GNorm = 0.1423, lr_0 = 1.2675e-04
Loss = 6.1847e-04, PNorm = 154.8488, GNorm = 0.0822, lr_0 = 1.2666e-04
Loss = 6.5671e-04, PNorm = 154.8483, GNorm = 0.0716, lr_0 = 1.2658e-04
Loss = 1.7174e-03, PNorm = 154.8482, GNorm = 0.0749, lr_0 = 1.2649e-04
Loss = 1.2217e-03, PNorm = 154.8503, GNorm = 0.0240, lr_0 = 1.2640e-04
Loss = 1.0237e-03, PNorm = 154.8507, GNorm = 0.0556, lr_0 = 1.2632e-04
Loss = 1.8301e-03, PNorm = 154.8530, GNorm = 0.1666, lr_0 = 1.2623e-04
Loss = 8.3716e-04, PNorm = 154.8559, GNorm = 0.0505, lr_0 = 1.2614e-04
Loss = 6.0866e-04, PNorm = 154.8569, GNorm = 0.1067, lr_0 = 1.2606e-04
Loss = 1.0165e-03, PNorm = 154.8569, GNorm = 0.1272, lr_0 = 1.2597e-04
Loss = 9.1138e-04, PNorm = 154.8581, GNorm = 0.0997, lr_0 = 1.2588e-04
Loss = 5.3658e-04, PNorm = 154.8599, GNorm = 0.0308, lr_0 = 1.2580e-04
Loss = 5.6796e-04, PNorm = 154.8611, GNorm = 0.0242, lr_0 = 1.2571e-04
Loss = 6.2650e-04, PNorm = 154.8627, GNorm = 0.0641, lr_0 = 1.2563e-04
Loss = 1.8773e-03, PNorm = 154.8635, GNorm = 0.0972, lr_0 = 1.2554e-04
Loss = 1.1513e-03, PNorm = 154.8660, GNorm = 0.0876, lr_0 = 1.2545e-04
Loss = 7.7972e-04, PNorm = 154.8683, GNorm = 0.0636, lr_0 = 1.2537e-04
Loss = 2.5471e-03, PNorm = 154.8715, GNorm = 0.0872, lr_0 = 1.2528e-04
Loss = 5.5584e-04, PNorm = 154.8752, GNorm = 0.0470, lr_0 = 1.2520e-04
Loss = 1.4857e-03, PNorm = 154.8779, GNorm = 0.1199, lr_0 = 1.2511e-04
Loss = 9.6079e-04, PNorm = 154.8785, GNorm = 0.0953, lr_0 = 1.2502e-04
Loss = 1.2866e-03, PNorm = 154.8796, GNorm = 0.0940, lr_0 = 1.2494e-04
Loss = 1.4671e-03, PNorm = 154.8814, GNorm = 0.2429, lr_0 = 1.2485e-04
Loss = 1.5635e-03, PNorm = 154.8820, GNorm = 0.0681, lr_0 = 1.2477e-04
Loss = 2.4435e-03, PNorm = 154.8812, GNorm = 0.0891, lr_0 = 1.2468e-04
Loss = 2.5322e-03, PNorm = 154.8817, GNorm = 0.1092, lr_0 = 1.2460e-04
Loss = 6.3892e-04, PNorm = 154.8839, GNorm = 0.0563, lr_0 = 1.2451e-04
Loss = 7.6313e-04, PNorm = 154.8855, GNorm = 0.0423, lr_0 = 1.2443e-04
Loss = 6.7747e-04, PNorm = 154.8898, GNorm = 0.0379, lr_0 = 1.2434e-04
Loss = 2.8536e-03, PNorm = 154.8915, GNorm = 0.0284, lr_0 = 1.2426e-04
Loss = 1.2742e-03, PNorm = 154.8933, GNorm = 0.0734, lr_0 = 1.2417e-04
Loss = 9.7049e-04, PNorm = 154.8946, GNorm = 0.0696, lr_0 = 1.2409e-04
Loss = 4.3121e-03, PNorm = 154.8928, GNorm = 0.0967, lr_0 = 1.2400e-04
Loss = 1.4203e-03, PNorm = 154.8941, GNorm = 0.1265, lr_0 = 1.2392e-04
Loss = 1.4679e-03, PNorm = 154.8970, GNorm = 0.1058, lr_0 = 1.2383e-04
Loss = 9.3652e-04, PNorm = 154.9002, GNorm = 0.0624, lr_0 = 1.2375e-04
Loss = 6.8176e-04, PNorm = 154.9020, GNorm = 0.0688, lr_0 = 1.2366e-04
Loss = 9.4097e-04, PNorm = 154.9034, GNorm = 0.0616, lr_0 = 1.2358e-04
Loss = 1.4632e-03, PNorm = 154.9036, GNorm = 0.0687, lr_0 = 1.2349e-04
Loss = 9.5884e-04, PNorm = 154.9040, GNorm = 0.1733, lr_0 = 1.2341e-04
Loss = 1.5399e-03, PNorm = 154.9053, GNorm = 0.0668, lr_0 = 1.2332e-04
Loss = 1.9349e-03, PNorm = 154.9083, GNorm = 0.0802, lr_0 = 1.2324e-04
Loss = 3.1237e-03, PNorm = 154.9087, GNorm = 0.0871, lr_0 = 1.2315e-04
Loss = 2.0649e-03, PNorm = 154.9115, GNorm = 0.1194, lr_0 = 1.2307e-04
Loss = 1.6333e-03, PNorm = 154.9126, GNorm = 0.1758, lr_0 = 1.2298e-04
Loss = 7.9844e-04, PNorm = 154.9150, GNorm = 0.0913, lr_0 = 1.2290e-04
Loss = 5.9742e-04, PNorm = 154.9151, GNorm = 0.0552, lr_0 = 1.2282e-04
Loss = 9.7781e-04, PNorm = 154.9185, GNorm = 0.1120, lr_0 = 1.2273e-04
Loss = 6.8712e-04, PNorm = 154.9206, GNorm = 0.0670, lr_0 = 1.2265e-04
Loss = 6.3866e-04, PNorm = 154.9215, GNorm = 0.0273, lr_0 = 1.2256e-04
Loss = 2.5717e-03, PNorm = 154.9217, GNorm = 0.0476, lr_0 = 1.2248e-04
Loss = 1.2835e-03, PNorm = 154.9222, GNorm = 0.0909, lr_0 = 1.2240e-04
Loss = 5.3082e-04, PNorm = 154.9235, GNorm = 0.1094, lr_0 = 1.2231e-04
Loss = 8.5435e-04, PNorm = 154.9244, GNorm = 0.1419, lr_0 = 1.2223e-04
Loss = 7.6878e-04, PNorm = 154.9261, GNorm = 0.1249, lr_0 = 1.2214e-04
Loss = 7.2945e-04, PNorm = 154.9282, GNorm = 0.0469, lr_0 = 1.2206e-04
Loss = 1.6732e-03, PNorm = 154.9294, GNorm = 0.1645, lr_0 = 1.2198e-04
Loss = 1.0883e-03, PNorm = 154.9306, GNorm = 0.0828, lr_0 = 1.2189e-04
Loss = 1.7632e-03, PNorm = 154.9309, GNorm = 0.1506, lr_0 = 1.2181e-04
Loss = 2.5131e-03, PNorm = 154.9334, GNorm = 0.0510, lr_0 = 1.2173e-04
Loss = 5.7555e-04, PNorm = 154.9359, GNorm = 0.0310, lr_0 = 1.2164e-04
Loss = 6.4335e-04, PNorm = 154.9359, GNorm = 0.0449, lr_0 = 1.2156e-04
Loss = 8.7762e-04, PNorm = 154.9382, GNorm = 0.0799, lr_0 = 1.2148e-04
Loss = 5.5139e-04, PNorm = 154.9402, GNorm = 0.0552, lr_0 = 1.2139e-04
Loss = 7.2391e-04, PNorm = 154.9421, GNorm = 0.0493, lr_0 = 1.2131e-04
Loss = 1.9823e-03, PNorm = 154.9436, GNorm = 0.0823, lr_0 = 1.2123e-04
Loss = 1.0350e-03, PNorm = 154.9440, GNorm = 0.0806, lr_0 = 1.2114e-04
Loss = 1.6733e-03, PNorm = 154.9449, GNorm = 0.2504, lr_0 = 1.2106e-04
Loss = 1.0302e-03, PNorm = 154.9473, GNorm = 0.1215, lr_0 = 1.2098e-04
Loss = 1.5583e-03, PNorm = 154.9477, GNorm = 0.1525, lr_0 = 1.2090e-04
Loss = 5.9618e-04, PNorm = 154.9486, GNorm = 0.0323, lr_0 = 1.2081e-04
Loss = 2.0987e-03, PNorm = 154.9493, GNorm = 0.0607, lr_0 = 1.2073e-04
Loss = 1.9057e-03, PNorm = 154.9509, GNorm = 0.0496, lr_0 = 1.2065e-04
Loss = 7.5614e-04, PNorm = 154.9514, GNorm = 0.1174, lr_0 = 1.2056e-04
Loss = 1.6119e-03, PNorm = 154.9518, GNorm = 0.0586, lr_0 = 1.2048e-04
Loss = 7.5378e-04, PNorm = 154.9512, GNorm = 0.0697, lr_0 = 1.2040e-04
Loss = 1.9043e-03, PNorm = 154.9539, GNorm = 0.0638, lr_0 = 1.2032e-04
Loss = 5.5382e-04, PNorm = 154.9556, GNorm = 0.0566, lr_0 = 1.2023e-04
Loss = 6.4998e-04, PNorm = 154.9582, GNorm = 0.0646, lr_0 = 1.2015e-04
Loss = 6.0956e-04, PNorm = 154.9607, GNorm = 0.1300, lr_0 = 1.2007e-04
Loss = 6.4506e-04, PNorm = 154.9620, GNorm = 0.0761, lr_0 = 1.1999e-04
Loss = 7.1603e-04, PNorm = 154.9635, GNorm = 0.0967, lr_0 = 1.1991e-04
Loss = 2.0402e-03, PNorm = 154.9642, GNorm = 0.0843, lr_0 = 1.1982e-04
Loss = 2.0247e-03, PNorm = 154.9661, GNorm = 0.2062, lr_0 = 1.1974e-04
Loss = 8.0198e-04, PNorm = 154.9666, GNorm = 0.1497, lr_0 = 1.1966e-04
Loss = 2.0295e-03, PNorm = 154.9677, GNorm = 0.0635, lr_0 = 1.1958e-04
Loss = 1.0177e-03, PNorm = 154.9696, GNorm = 0.0482, lr_0 = 1.1950e-04
Loss = 1.1850e-03, PNorm = 154.9720, GNorm = 0.0295, lr_0 = 1.1941e-04
Loss = 7.9500e-04, PNorm = 154.9750, GNorm = 0.0209, lr_0 = 1.1933e-04
Loss = 7.9164e-04, PNorm = 154.9763, GNorm = 0.1319, lr_0 = 1.1925e-04
Loss = 9.9511e-04, PNorm = 154.9781, GNorm = 0.0799, lr_0 = 1.1917e-04
Loss = 1.0628e-03, PNorm = 154.9790, GNorm = 0.0326, lr_0 = 1.1909e-04
Loss = 1.5477e-03, PNorm = 154.9805, GNorm = 0.1005, lr_0 = 1.1901e-04
Loss = 9.9143e-04, PNorm = 154.9822, GNorm = 0.1575, lr_0 = 1.1892e-04
Loss = 2.3410e-03, PNorm = 154.9839, GNorm = 0.0866, lr_0 = 1.1884e-04
Loss = 6.0013e-04, PNorm = 154.9858, GNorm = 0.0720, lr_0 = 1.1876e-04
Loss = 1.2892e-03, PNorm = 154.9868, GNorm = 0.0951, lr_0 = 1.1868e-04
Loss = 1.4054e-03, PNorm = 154.9876, GNorm = 0.0899, lr_0 = 1.1860e-04
Loss = 1.2811e-03, PNorm = 154.9894, GNorm = 0.0584, lr_0 = 1.1852e-04
Loss = 9.9926e-04, PNorm = 154.9898, GNorm = 0.0872, lr_0 = 1.1844e-04
Loss = 1.7606e-03, PNorm = 154.9929, GNorm = 0.1421, lr_0 = 1.1835e-04
Loss = 1.5305e-03, PNorm = 154.9950, GNorm = 0.1002, lr_0 = 1.1827e-04
Loss = 2.0858e-03, PNorm = 154.9960, GNorm = 0.2317, lr_0 = 1.1819e-04
Loss = 5.1754e-03, PNorm = 154.9960, GNorm = 0.2449, lr_0 = 1.1811e-04
Loss = 1.0138e-03, PNorm = 155.0001, GNorm = 0.0904, lr_0 = 1.1803e-04
Loss = 8.9589e-04, PNorm = 155.0019, GNorm = 0.0974, lr_0 = 1.1795e-04
Loss = 2.1598e-03, PNorm = 155.0023, GNorm = 0.1710, lr_0 = 1.1787e-04
Validation mae = 0.475471
Epoch 28
Loss = 7.6902e-04, PNorm = 155.0048, GNorm = 0.0647, lr_0 = 1.1779e-04
Loss = 4.0508e-03, PNorm = 155.0051, GNorm = 0.0410, lr_0 = 1.1771e-04
Loss = 5.2613e-04, PNorm = 155.0062, GNorm = 0.0686, lr_0 = 1.1763e-04
Loss = 1.6415e-03, PNorm = 155.0073, GNorm = 0.0261, lr_0 = 1.1755e-04
Loss = 6.9659e-04, PNorm = 155.0085, GNorm = 0.1185, lr_0 = 1.1747e-04
Loss = 5.0806e-04, PNorm = 155.0107, GNorm = 0.0741, lr_0 = 1.1739e-04
Loss = 4.6355e-04, PNorm = 155.0126, GNorm = 0.0808, lr_0 = 1.1730e-04
Loss = 2.1019e-03, PNorm = 155.0145, GNorm = 0.0783, lr_0 = 1.1722e-04
Loss = 5.2081e-04, PNorm = 155.0150, GNorm = 0.0792, lr_0 = 1.1714e-04
Loss = 9.8869e-04, PNorm = 155.0157, GNorm = 0.0911, lr_0 = 1.1706e-04
Loss = 1.2714e-03, PNorm = 155.0168, GNorm = 0.2797, lr_0 = 1.1698e-04
Loss = 7.1454e-04, PNorm = 155.0178, GNorm = 0.0721, lr_0 = 1.1690e-04
Loss = 5.0026e-04, PNorm = 155.0171, GNorm = 0.0487, lr_0 = 1.1682e-04
Loss = 8.3902e-04, PNorm = 155.0177, GNorm = 0.0810, lr_0 = 1.1674e-04
Loss = 9.0482e-04, PNorm = 155.0191, GNorm = 0.3478, lr_0 = 1.1666e-04
Loss = 9.9551e-04, PNorm = 155.0219, GNorm = 0.1751, lr_0 = 1.1658e-04
Loss = 3.1172e-03, PNorm = 155.0215, GNorm = 0.3587, lr_0 = 1.1650e-04
Loss = 1.0250e-03, PNorm = 155.0224, GNorm = 0.1158, lr_0 = 1.1642e-04
Loss = 2.3738e-03, PNorm = 155.0241, GNorm = 0.0614, lr_0 = 1.1634e-04
Loss = 4.5760e-04, PNorm = 155.0256, GNorm = 0.0729, lr_0 = 1.1626e-04
Loss = 4.8339e-04, PNorm = 155.0257, GNorm = 0.0927, lr_0 = 1.1618e-04
Loss = 5.2075e-04, PNorm = 155.0268, GNorm = 0.0573, lr_0 = 1.1611e-04
Loss = 9.3132e-04, PNorm = 155.0275, GNorm = 0.0227, lr_0 = 1.1603e-04
Loss = 4.9078e-04, PNorm = 155.0292, GNorm = 0.0603, lr_0 = 1.1595e-04
Loss = 1.0190e-03, PNorm = 155.0311, GNorm = 0.0825, lr_0 = 1.1587e-04
Loss = 6.5714e-04, PNorm = 155.0327, GNorm = 0.0200, lr_0 = 1.1579e-04
Loss = 7.0584e-04, PNorm = 155.0338, GNorm = 0.0518, lr_0 = 1.1571e-04
Loss = 1.0968e-03, PNorm = 155.0343, GNorm = 0.0331, lr_0 = 1.1563e-04
Loss = 4.9132e-04, PNorm = 155.0358, GNorm = 0.0225, lr_0 = 1.1555e-04
Loss = 6.1976e-04, PNorm = 155.0371, GNorm = 0.0517, lr_0 = 1.1547e-04
Loss = 4.5802e-04, PNorm = 155.0372, GNorm = 0.0516, lr_0 = 1.1539e-04
Loss = 4.3224e-03, PNorm = 155.0367, GNorm = 0.1306, lr_0 = 1.1531e-04
Loss = 1.9189e-03, PNorm = 155.0384, GNorm = 0.1154, lr_0 = 1.1523e-04
Loss = 1.0002e-03, PNorm = 155.0405, GNorm = 0.0525, lr_0 = 1.1515e-04
Loss = 6.4950e-04, PNorm = 155.0421, GNorm = 0.0367, lr_0 = 1.1508e-04
Loss = 4.1109e-04, PNorm = 155.0440, GNorm = 0.0264, lr_0 = 1.1500e-04
Loss = 4.6318e-04, PNorm = 155.0462, GNorm = 0.0235, lr_0 = 1.1492e-04
Loss = 9.0622e-04, PNorm = 155.0467, GNorm = 0.0744, lr_0 = 1.1484e-04
Loss = 8.7718e-04, PNorm = 155.0479, GNorm = 0.1034, lr_0 = 1.1476e-04
Loss = 4.8173e-04, PNorm = 155.0499, GNorm = 0.0499, lr_0 = 1.1468e-04
Loss = 1.8178e-03, PNorm = 155.0515, GNorm = 0.0501, lr_0 = 1.1460e-04
Loss = 7.3203e-04, PNorm = 155.0526, GNorm = 0.1079, lr_0 = 1.1452e-04
Loss = 1.0351e-03, PNorm = 155.0539, GNorm = 0.0493, lr_0 = 1.1445e-04
Loss = 5.7049e-04, PNorm = 155.0559, GNorm = 0.0706, lr_0 = 1.1437e-04
Loss = 5.9899e-04, PNorm = 155.0564, GNorm = 0.0372, lr_0 = 1.1429e-04
Loss = 1.6004e-03, PNorm = 155.0570, GNorm = 0.0536, lr_0 = 1.1421e-04
Loss = 9.8843e-04, PNorm = 155.0572, GNorm = 0.0574, lr_0 = 1.1413e-04
Loss = 4.6027e-04, PNorm = 155.0584, GNorm = 0.0524, lr_0 = 1.1405e-04
Loss = 8.0284e-04, PNorm = 155.0586, GNorm = 0.0576, lr_0 = 1.1398e-04
Loss = 1.0209e-03, PNorm = 155.0598, GNorm = 0.1017, lr_0 = 1.1390e-04
Loss = 1.1569e-03, PNorm = 155.0610, GNorm = 0.0758, lr_0 = 1.1382e-04
Loss = 5.1543e-04, PNorm = 155.0628, GNorm = 0.0355, lr_0 = 1.1374e-04
Loss = 1.5529e-03, PNorm = 155.0626, GNorm = 0.0506, lr_0 = 1.1366e-04
Loss = 1.1800e-03, PNorm = 155.0639, GNorm = 0.1191, lr_0 = 1.1359e-04
Loss = 1.3162e-03, PNorm = 155.0652, GNorm = 0.0925, lr_0 = 1.1351e-04
Loss = 5.8663e-04, PNorm = 155.0659, GNorm = 0.0588, lr_0 = 1.1343e-04
Loss = 4.4255e-04, PNorm = 155.0674, GNorm = 0.0154, lr_0 = 1.1335e-04
Loss = 2.5410e-03, PNorm = 155.0664, GNorm = 0.1711, lr_0 = 1.1328e-04
Loss = 1.3883e-03, PNorm = 155.0682, GNorm = 0.2454, lr_0 = 1.1320e-04
Loss = 1.5893e-03, PNorm = 155.0688, GNorm = 0.1356, lr_0 = 1.1312e-04
Loss = 1.4857e-03, PNorm = 155.0726, GNorm = 0.0935, lr_0 = 1.1304e-04
Loss = 8.4639e-04, PNorm = 155.0756, GNorm = 0.1695, lr_0 = 1.1297e-04
Loss = 2.1987e-03, PNorm = 155.0769, GNorm = 0.1390, lr_0 = 1.1289e-04
Loss = 2.3034e-03, PNorm = 155.0769, GNorm = 0.0637, lr_0 = 1.1281e-04
Loss = 3.4551e-03, PNorm = 155.0776, GNorm = 0.1480, lr_0 = 1.1273e-04
Loss = 1.8308e-03, PNorm = 155.0797, GNorm = 0.1713, lr_0 = 1.1266e-04
Loss = 7.3800e-04, PNorm = 155.0803, GNorm = 0.0860, lr_0 = 1.1258e-04
Loss = 8.8710e-04, PNorm = 155.0818, GNorm = 0.0918, lr_0 = 1.1250e-04
Loss = 1.0823e-03, PNorm = 155.0831, GNorm = 0.0837, lr_0 = 1.1243e-04
Loss = 1.4938e-03, PNorm = 155.0851, GNorm = 0.0436, lr_0 = 1.1235e-04
Loss = 1.2188e-03, PNorm = 155.0857, GNorm = 0.0407, lr_0 = 1.1227e-04
Loss = 1.5130e-03, PNorm = 155.0866, GNorm = 0.0805, lr_0 = 1.1219e-04
Loss = 8.1490e-04, PNorm = 155.0880, GNorm = 0.0848, lr_0 = 1.1212e-04
Loss = 4.7691e-04, PNorm = 155.0884, GNorm = 0.1479, lr_0 = 1.1204e-04
Loss = 1.0524e-03, PNorm = 155.0882, GNorm = 0.0512, lr_0 = 1.1196e-04
Loss = 1.3944e-03, PNorm = 155.0886, GNorm = 0.0499, lr_0 = 1.1189e-04
Loss = 1.2642e-03, PNorm = 155.0915, GNorm = 0.0391, lr_0 = 1.1181e-04
Loss = 8.9536e-04, PNorm = 155.0927, GNorm = 0.1712, lr_0 = 1.1173e-04
Loss = 5.4074e-04, PNorm = 155.0931, GNorm = 0.0421, lr_0 = 1.1166e-04
Loss = 1.0410e-03, PNorm = 155.0945, GNorm = 0.0818, lr_0 = 1.1158e-04
Loss = 1.3612e-03, PNorm = 155.0955, GNorm = 0.0765, lr_0 = 1.1150e-04
Loss = 1.4680e-03, PNorm = 155.0979, GNorm = 0.1820, lr_0 = 1.1143e-04
Loss = 1.2356e-03, PNorm = 155.0999, GNorm = 0.0845, lr_0 = 1.1135e-04
Loss = 1.3422e-03, PNorm = 155.1009, GNorm = 0.0409, lr_0 = 1.1128e-04
Loss = 1.0479e-03, PNorm = 155.1019, GNorm = 0.0303, lr_0 = 1.1120e-04
Loss = 9.3954e-04, PNorm = 155.1021, GNorm = 0.0490, lr_0 = 1.1112e-04
Loss = 7.4793e-04, PNorm = 155.1024, GNorm = 0.0317, lr_0 = 1.1105e-04
Loss = 1.3364e-03, PNorm = 155.1030, GNorm = 0.0704, lr_0 = 1.1097e-04
Loss = 1.5263e-03, PNorm = 155.1048, GNorm = 0.0722, lr_0 = 1.1089e-04
Loss = 5.4936e-04, PNorm = 155.1060, GNorm = 0.0740, lr_0 = 1.1082e-04
Loss = 8.0458e-04, PNorm = 155.1090, GNorm = 0.0493, lr_0 = 1.1074e-04
Loss = 5.9715e-04, PNorm = 155.1107, GNorm = 0.0643, lr_0 = 1.1067e-04
Loss = 1.8269e-03, PNorm = 155.1131, GNorm = 0.0605, lr_0 = 1.1059e-04
Loss = 1.1746e-03, PNorm = 155.1146, GNorm = 0.0692, lr_0 = 1.1052e-04
Loss = 6.0898e-04, PNorm = 155.1151, GNorm = 0.0495, lr_0 = 1.1044e-04
Loss = 1.1436e-03, PNorm = 155.1142, GNorm = 0.1364, lr_0 = 1.1036e-04
Loss = 1.6971e-03, PNorm = 155.1158, GNorm = 0.0804, lr_0 = 1.1029e-04
Loss = 1.5194e-03, PNorm = 155.1164, GNorm = 0.1330, lr_0 = 1.1021e-04
Loss = 1.7102e-03, PNorm = 155.1184, GNorm = 0.0486, lr_0 = 1.1014e-04
Loss = 1.3123e-03, PNorm = 155.1182, GNorm = 0.0356, lr_0 = 1.1006e-04
Loss = 2.7061e-03, PNorm = 155.1194, GNorm = 0.0276, lr_0 = 1.0999e-04
Loss = 1.5057e-03, PNorm = 155.1199, GNorm = 0.2022, lr_0 = 1.0991e-04
Loss = 1.8219e-03, PNorm = 155.1201, GNorm = 0.0319, lr_0 = 1.0984e-04
Loss = 1.8627e-03, PNorm = 155.1215, GNorm = 0.1020, lr_0 = 1.0976e-04
Loss = 1.1057e-03, PNorm = 155.1216, GNorm = 0.0933, lr_0 = 1.0969e-04
Loss = 1.5734e-03, PNorm = 155.1227, GNorm = 0.1347, lr_0 = 1.0961e-04
Loss = 9.5874e-04, PNorm = 155.1237, GNorm = 0.0945, lr_0 = 1.0954e-04
Loss = 8.9601e-04, PNorm = 155.1251, GNorm = 0.1337, lr_0 = 1.0946e-04
Loss = 5.3785e-04, PNorm = 155.1264, GNorm = 0.0368, lr_0 = 1.0939e-04
Loss = 1.9775e-03, PNorm = 155.1288, GNorm = 0.2640, lr_0 = 1.0931e-04
Loss = 7.1040e-04, PNorm = 155.1297, GNorm = 0.0293, lr_0 = 1.0924e-04
Loss = 4.4746e-04, PNorm = 155.1300, GNorm = 0.0986, lr_0 = 1.0916e-04
Loss = 1.4562e-03, PNorm = 155.1326, GNorm = 0.0424, lr_0 = 1.0909e-04
Loss = 1.1165e-03, PNorm = 155.1351, GNorm = 0.1044, lr_0 = 1.0901e-04
Loss = 1.1256e-03, PNorm = 155.1366, GNorm = 0.0863, lr_0 = 1.0894e-04
Loss = 1.2749e-03, PNorm = 155.1360, GNorm = 0.1387, lr_0 = 1.0886e-04
Loss = 1.0535e-03, PNorm = 155.1364, GNorm = 0.1009, lr_0 = 1.0879e-04
Loss = 8.8086e-04, PNorm = 155.1374, GNorm = 0.1189, lr_0 = 1.0871e-04
Loss = 1.4714e-03, PNorm = 155.1395, GNorm = 0.1611, lr_0 = 1.0864e-04
Loss = 5.0721e-04, PNorm = 155.1418, GNorm = 0.0548, lr_0 = 1.0856e-04
Validation mae = 0.475863
Epoch 29
Loss = 1.3288e-03, PNorm = 155.1436, GNorm = 0.0243, lr_0 = 1.0849e-04
Loss = 1.0989e-03, PNorm = 155.1452, GNorm = 0.1432, lr_0 = 1.0841e-04
Loss = 1.2129e-03, PNorm = 155.1467, GNorm = 0.3154, lr_0 = 1.0834e-04
Loss = 1.1653e-03, PNorm = 155.1479, GNorm = 0.2347, lr_0 = 1.0827e-04
Loss = 1.4246e-03, PNorm = 155.1498, GNorm = 0.0824, lr_0 = 1.0819e-04
Loss = 6.2795e-04, PNorm = 155.1506, GNorm = 0.1349, lr_0 = 1.0812e-04
Loss = 7.4982e-04, PNorm = 155.1522, GNorm = 0.0871, lr_0 = 1.0804e-04
Loss = 5.4299e-04, PNorm = 155.1521, GNorm = 0.0466, lr_0 = 1.0797e-04
Loss = 7.5271e-04, PNorm = 155.1532, GNorm = 0.0464, lr_0 = 1.0790e-04
Loss = 8.2334e-04, PNorm = 155.1545, GNorm = 0.1523, lr_0 = 1.0782e-04
Loss = 8.4870e-04, PNorm = 155.1567, GNorm = 0.0732, lr_0 = 1.0775e-04
Loss = 5.4790e-04, PNorm = 155.1573, GNorm = 0.2298, lr_0 = 1.0767e-04
Loss = 5.1972e-04, PNorm = 155.1569, GNorm = 0.0391, lr_0 = 1.0760e-04
Loss = 1.3552e-03, PNorm = 155.1577, GNorm = 0.0340, lr_0 = 1.0753e-04
Loss = 6.0240e-04, PNorm = 155.1596, GNorm = 0.0456, lr_0 = 1.0745e-04
Loss = 4.6394e-04, PNorm = 155.1617, GNorm = 0.0282, lr_0 = 1.0738e-04
Loss = 8.6246e-04, PNorm = 155.1637, GNorm = 0.0882, lr_0 = 1.0731e-04
Loss = 1.0772e-03, PNorm = 155.1653, GNorm = 0.0567, lr_0 = 1.0723e-04
Loss = 1.2572e-03, PNorm = 155.1665, GNorm = 0.0712, lr_0 = 1.0716e-04
Loss = 8.0992e-04, PNorm = 155.1675, GNorm = 0.0763, lr_0 = 1.0709e-04
Loss = 1.5083e-03, PNorm = 155.1685, GNorm = 0.1533, lr_0 = 1.0701e-04
Loss = 4.4230e-04, PNorm = 155.1696, GNorm = 0.0462, lr_0 = 1.0694e-04
Loss = 3.9881e-04, PNorm = 155.1710, GNorm = 0.1139, lr_0 = 1.0687e-04
Loss = 4.3088e-04, PNorm = 155.1717, GNorm = 0.0951, lr_0 = 1.0679e-04
Loss = 8.3865e-04, PNorm = 155.1722, GNorm = 0.0423, lr_0 = 1.0672e-04
Loss = 1.4651e-03, PNorm = 155.1721, GNorm = 0.1172, lr_0 = 1.0665e-04
Loss = 1.1864e-03, PNorm = 155.1724, GNorm = 0.2101, lr_0 = 1.0657e-04
Loss = 5.3086e-04, PNorm = 155.1729, GNorm = 0.0983, lr_0 = 1.0650e-04
Loss = 6.5983e-04, PNorm = 155.1745, GNorm = 0.0246, lr_0 = 1.0643e-04
Loss = 1.6723e-03, PNorm = 155.1764, GNorm = 0.2055, lr_0 = 1.0635e-04
Loss = 7.3040e-04, PNorm = 155.1767, GNorm = 0.0281, lr_0 = 1.0628e-04
Loss = 5.5597e-04, PNorm = 155.1771, GNorm = 0.0410, lr_0 = 1.0621e-04
Loss = 1.6702e-03, PNorm = 155.1764, GNorm = 0.1378, lr_0 = 1.0614e-04
Loss = 8.6245e-04, PNorm = 155.1771, GNorm = 0.1380, lr_0 = 1.0606e-04
Loss = 1.9893e-03, PNorm = 155.1783, GNorm = 0.0577, lr_0 = 1.0599e-04
Loss = 1.9202e-03, PNorm = 155.1808, GNorm = 0.1373, lr_0 = 1.0592e-04
Loss = 1.9813e-03, PNorm = 155.1815, GNorm = 0.0562, lr_0 = 1.0585e-04
Loss = 3.1116e-03, PNorm = 155.1829, GNorm = 0.0688, lr_0 = 1.0577e-04
Loss = 4.3803e-04, PNorm = 155.1840, GNorm = 0.0773, lr_0 = 1.0570e-04
Loss = 7.3732e-04, PNorm = 155.1863, GNorm = 0.0622, lr_0 = 1.0563e-04
Loss = 1.0734e-03, PNorm = 155.1877, GNorm = 0.0711, lr_0 = 1.0556e-04
Loss = 8.6164e-04, PNorm = 155.1892, GNorm = 0.0672, lr_0 = 1.0548e-04
Loss = 4.8254e-04, PNorm = 155.1909, GNorm = 0.0225, lr_0 = 1.0541e-04
Loss = 8.8519e-04, PNorm = 155.1925, GNorm = 0.0924, lr_0 = 1.0534e-04
Loss = 7.0918e-04, PNorm = 155.1935, GNorm = 0.0527, lr_0 = 1.0527e-04
Loss = 1.5608e-03, PNorm = 155.1953, GNorm = 0.0258, lr_0 = 1.0519e-04
Loss = 6.1277e-04, PNorm = 155.1969, GNorm = 0.0267, lr_0 = 1.0512e-04
Loss = 8.6154e-04, PNorm = 155.1976, GNorm = 0.1283, lr_0 = 1.0505e-04
Loss = 1.9401e-03, PNorm = 155.1993, GNorm = 0.0897, lr_0 = 1.0498e-04
Loss = 3.7141e-03, PNorm = 155.2010, GNorm = 0.0398, lr_0 = 1.0491e-04
Loss = 8.8231e-04, PNorm = 155.2024, GNorm = 0.2564, lr_0 = 1.0483e-04
Loss = 1.0843e-03, PNorm = 155.2013, GNorm = 0.0499, lr_0 = 1.0476e-04
Loss = 6.8243e-04, PNorm = 155.2010, GNorm = 0.2325, lr_0 = 1.0469e-04
Loss = 8.7064e-04, PNorm = 155.2023, GNorm = 0.0458, lr_0 = 1.0462e-04
Loss = 4.6264e-04, PNorm = 155.2035, GNorm = 0.0444, lr_0 = 1.0455e-04
Loss = 1.4001e-03, PNorm = 155.2031, GNorm = 0.1193, lr_0 = 1.0448e-04
Loss = 8.1743e-04, PNorm = 155.2033, GNorm = 0.0766, lr_0 = 1.0440e-04
Loss = 1.5978e-03, PNorm = 155.2043, GNorm = 0.0609, lr_0 = 1.0433e-04
Loss = 1.2621e-03, PNorm = 155.2053, GNorm = 0.1280, lr_0 = 1.0426e-04
Loss = 3.2843e-03, PNorm = 155.2057, GNorm = 0.0650, lr_0 = 1.0419e-04
Loss = 3.7715e-03, PNorm = 155.2055, GNorm = 0.1881, lr_0 = 1.0412e-04
Loss = 1.3660e-03, PNorm = 155.2063, GNorm = 0.0680, lr_0 = 1.0405e-04
Loss = 1.6574e-03, PNorm = 155.2072, GNorm = 0.0534, lr_0 = 1.0398e-04
Loss = 7.8294e-04, PNorm = 155.2084, GNorm = 0.0263, lr_0 = 1.0391e-04
Loss = 6.9188e-04, PNorm = 155.2098, GNorm = 0.0471, lr_0 = 1.0383e-04
Loss = 1.0921e-03, PNorm = 155.2119, GNorm = 0.1448, lr_0 = 1.0376e-04
Loss = 3.7886e-04, PNorm = 155.2137, GNorm = 0.0810, lr_0 = 1.0369e-04
Loss = 7.8525e-04, PNorm = 155.2145, GNorm = 0.0771, lr_0 = 1.0362e-04
Loss = 3.7802e-04, PNorm = 155.2152, GNorm = 0.0492, lr_0 = 1.0355e-04
Loss = 4.9220e-04, PNorm = 155.2175, GNorm = 0.1669, lr_0 = 1.0348e-04
Loss = 3.6352e-03, PNorm = 155.2197, GNorm = 0.1004, lr_0 = 1.0341e-04
Loss = 9.4166e-04, PNorm = 155.2205, GNorm = 0.1120, lr_0 = 1.0334e-04
Loss = 1.1640e-03, PNorm = 155.2181, GNorm = 0.0294, lr_0 = 1.0327e-04
Loss = 1.6410e-03, PNorm = 155.2187, GNorm = 0.0303, lr_0 = 1.0320e-04
Loss = 5.2593e-04, PNorm = 155.2213, GNorm = 0.0247, lr_0 = 1.0312e-04
Loss = 7.6457e-04, PNorm = 155.2234, GNorm = 0.0280, lr_0 = 1.0305e-04
Loss = 3.7811e-04, PNorm = 155.2247, GNorm = 0.0578, lr_0 = 1.0298e-04
Loss = 7.7165e-04, PNorm = 155.2252, GNorm = 0.1161, lr_0 = 1.0291e-04
Loss = 1.4387e-03, PNorm = 155.2271, GNorm = 0.0483, lr_0 = 1.0284e-04
Loss = 1.0347e-03, PNorm = 155.2279, GNorm = 0.1464, lr_0 = 1.0277e-04
Loss = 1.0680e-03, PNorm = 155.2299, GNorm = 0.1643, lr_0 = 1.0270e-04
Loss = 1.2576e-03, PNorm = 155.2303, GNorm = 0.2069, lr_0 = 1.0263e-04
Loss = 1.4403e-03, PNorm = 155.2307, GNorm = 0.0719, lr_0 = 1.0256e-04
Loss = 1.3998e-03, PNorm = 155.2299, GNorm = 0.0297, lr_0 = 1.0249e-04
Loss = 1.5048e-03, PNorm = 155.2301, GNorm = 0.0838, lr_0 = 1.0242e-04
Loss = 5.4028e-04, PNorm = 155.2305, GNorm = 0.0205, lr_0 = 1.0235e-04
Loss = 1.1094e-03, PNorm = 155.2321, GNorm = 0.1027, lr_0 = 1.0228e-04
Loss = 3.8395e-04, PNorm = 155.2338, GNorm = 0.0745, lr_0 = 1.0221e-04
Loss = 9.1548e-04, PNorm = 155.2360, GNorm = 0.0351, lr_0 = 1.0214e-04
Loss = 9.1583e-04, PNorm = 155.2381, GNorm = 0.0546, lr_0 = 1.0207e-04
Loss = 6.9715e-04, PNorm = 155.2392, GNorm = 0.0313, lr_0 = 1.0200e-04
Loss = 1.0343e-03, PNorm = 155.2410, GNorm = 0.0311, lr_0 = 1.0193e-04
Loss = 5.6098e-04, PNorm = 155.2420, GNorm = 0.0905, lr_0 = 1.0186e-04
Loss = 7.8160e-04, PNorm = 155.2431, GNorm = 0.0525, lr_0 = 1.0179e-04
Loss = 5.4408e-04, PNorm = 155.2439, GNorm = 0.0364, lr_0 = 1.0172e-04
Loss = 9.5961e-04, PNorm = 155.2459, GNorm = 0.0822, lr_0 = 1.0165e-04
Loss = 7.9037e-04, PNorm = 155.2465, GNorm = 0.0979, lr_0 = 1.0158e-04
Loss = 1.1172e-03, PNorm = 155.2470, GNorm = 0.0708, lr_0 = 1.0151e-04
Loss = 6.4031e-04, PNorm = 155.2491, GNorm = 0.1147, lr_0 = 1.0144e-04
Loss = 2.3611e-03, PNorm = 155.2500, GNorm = 0.0759, lr_0 = 1.0137e-04
Loss = 4.3220e-04, PNorm = 155.2518, GNorm = 0.0580, lr_0 = 1.0130e-04
Loss = 1.3106e-03, PNorm = 155.2517, GNorm = 0.2255, lr_0 = 1.0123e-04
Loss = 2.8004e-03, PNorm = 155.2516, GNorm = 0.0728, lr_0 = 1.0116e-04
Loss = 9.6326e-04, PNorm = 155.2532, GNorm = 0.0904, lr_0 = 1.0110e-04
Loss = 6.9173e-04, PNorm = 155.2544, GNorm = 0.0921, lr_0 = 1.0103e-04
Loss = 5.1437e-04, PNorm = 155.2571, GNorm = 0.0817, lr_0 = 1.0096e-04
Loss = 9.8606e-04, PNorm = 155.2599, GNorm = 0.0366, lr_0 = 1.0089e-04
Loss = 5.1527e-04, PNorm = 155.2628, GNorm = 0.1056, lr_0 = 1.0082e-04
Loss = 9.3791e-04, PNorm = 155.2630, GNorm = 0.1411, lr_0 = 1.0075e-04
Loss = 9.2503e-04, PNorm = 155.2641, GNorm = 0.0310, lr_0 = 1.0068e-04
Loss = 1.6688e-03, PNorm = 155.2655, GNorm = 0.0903, lr_0 = 1.0061e-04
Loss = 1.0692e-03, PNorm = 155.2673, GNorm = 0.0914, lr_0 = 1.0054e-04
Loss = 1.3058e-03, PNorm = 155.2679, GNorm = 0.0360, lr_0 = 1.0047e-04
Loss = 8.7565e-04, PNorm = 155.2683, GNorm = 0.0493, lr_0 = 1.0041e-04
Loss = 1.0281e-03, PNorm = 155.2689, GNorm = 0.1448, lr_0 = 1.0034e-04
Loss = 1.4690e-03, PNorm = 155.2706, GNorm = 0.0567, lr_0 = 1.0027e-04
Loss = 9.6331e-04, PNorm = 155.2712, GNorm = 0.0951, lr_0 = 1.0020e-04
Loss = 1.1839e-03, PNorm = 155.2711, GNorm = 0.1338, lr_0 = 1.0013e-04
Loss = 1.7034e-03, PNorm = 155.2733, GNorm = 0.0627, lr_0 = 1.0006e-04
Loss = 1.0075e-03, PNorm = 155.2737, GNorm = 0.1734, lr_0 = 1.0000e-04
Validation mae = 0.476328
Model 0 best validation mae = 0.475457 on epoch 26
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.454501
Ensemble test mae = 0.454501
Fold 6
Splitting data with seed 6
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 9.4261e-01, PNorm = 62.2434, GNorm = 2.3193, lr_0 = 1.0413e-04
Loss = 6.9274e-01, PNorm = 62.2535, GNorm = 1.9536, lr_0 = 1.0788e-04
Loss = 6.2003e-01, PNorm = 62.2652, GNorm = 1.7971, lr_0 = 1.1163e-04
Loss = 5.0095e-01, PNorm = 62.2777, GNorm = 2.0257, lr_0 = 1.1537e-04
Loss = 5.3090e-01, PNorm = 62.2873, GNorm = 1.5500, lr_0 = 1.1913e-04
Loss = 4.1844e-01, PNorm = 62.2960, GNorm = 1.9344, lr_0 = 1.2287e-04
Loss = 4.0005e-01, PNorm = 62.3042, GNorm = 2.0608, lr_0 = 1.2663e-04
Loss = 3.7885e-01, PNorm = 62.3134, GNorm = 3.3637, lr_0 = 1.3038e-04
Loss = 4.1534e-01, PNorm = 62.3232, GNorm = 2.1434, lr_0 = 1.3413e-04
Loss = 3.5725e-01, PNorm = 62.3317, GNorm = 2.9745, lr_0 = 1.3788e-04
Loss = 3.5528e-01, PNorm = 62.3402, GNorm = 2.8016, lr_0 = 1.4163e-04
Loss = 3.5225e-01, PNorm = 62.3512, GNorm = 2.4284, lr_0 = 1.4537e-04
Loss = 3.3404e-01, PNorm = 62.3613, GNorm = 2.0194, lr_0 = 1.4913e-04
Loss = 3.5862e-01, PNorm = 62.3715, GNorm = 1.9621, lr_0 = 1.5288e-04
Loss = 3.4020e-01, PNorm = 62.3824, GNorm = 2.1371, lr_0 = 1.5662e-04
Loss = 2.9072e-01, PNorm = 62.3923, GNorm = 1.2976, lr_0 = 1.6038e-04
Loss = 3.1981e-01, PNorm = 62.4041, GNorm = 1.9830, lr_0 = 1.6412e-04
Loss = 3.4759e-01, PNorm = 62.4157, GNorm = 1.8827, lr_0 = 1.6788e-04
Loss = 3.5138e-01, PNorm = 62.4303, GNorm = 1.7316, lr_0 = 1.7163e-04
Loss = 3.2553e-01, PNorm = 62.4448, GNorm = 1.8229, lr_0 = 1.7538e-04
Loss = 2.8799e-01, PNorm = 62.4569, GNorm = 1.3437, lr_0 = 1.7913e-04
Loss = 3.6262e-01, PNorm = 62.4700, GNorm = 2.6817, lr_0 = 1.8288e-04
Loss = 3.3555e-01, PNorm = 62.4824, GNorm = 1.6898, lr_0 = 1.8662e-04
Loss = 3.0932e-01, PNorm = 62.4974, GNorm = 2.3894, lr_0 = 1.9038e-04
Loss = 3.3741e-01, PNorm = 62.5111, GNorm = 3.1675, lr_0 = 1.9413e-04
Loss = 2.6857e-01, PNorm = 62.5249, GNorm = 1.3701, lr_0 = 1.9788e-04
Loss = 2.6194e-01, PNorm = 62.5386, GNorm = 1.2123, lr_0 = 2.0163e-04
Loss = 2.8731e-01, PNorm = 62.5519, GNorm = 2.4600, lr_0 = 2.0537e-04
Loss = 3.1646e-01, PNorm = 62.5663, GNorm = 1.7241, lr_0 = 2.0913e-04
Loss = 2.4152e-01, PNorm = 62.5804, GNorm = 1.5976, lr_0 = 2.1288e-04
Loss = 2.9435e-01, PNorm = 62.5952, GNorm = 1.6050, lr_0 = 2.1663e-04
Loss = 2.8566e-01, PNorm = 62.6126, GNorm = 1.3891, lr_0 = 2.2038e-04
Loss = 2.9518e-01, PNorm = 62.6324, GNorm = 1.7948, lr_0 = 2.2412e-04
Loss = 2.5292e-01, PNorm = 62.6479, GNorm = 1.5307, lr_0 = 2.2787e-04
Loss = 2.5727e-01, PNorm = 62.6663, GNorm = 2.5544, lr_0 = 2.3163e-04
Loss = 2.9595e-01, PNorm = 62.6820, GNorm = 1.8485, lr_0 = 2.3538e-04
Loss = 2.4552e-01, PNorm = 62.7009, GNorm = 1.2625, lr_0 = 2.3913e-04
Loss = 2.7889e-01, PNorm = 62.7189, GNorm = 1.1781, lr_0 = 2.4288e-04
Loss = 2.7495e-01, PNorm = 62.7353, GNorm = 2.1227, lr_0 = 2.4662e-04
Loss = 3.0116e-01, PNorm = 62.7554, GNorm = 1.2416, lr_0 = 2.5038e-04
Loss = 2.6976e-01, PNorm = 62.7762, GNorm = 1.3161, lr_0 = 2.5413e-04
Loss = 2.3832e-01, PNorm = 62.7953, GNorm = 1.3131, lr_0 = 2.5788e-04
Loss = 2.4482e-01, PNorm = 62.8141, GNorm = 1.9920, lr_0 = 2.6163e-04
Loss = 2.7087e-01, PNorm = 62.8345, GNorm = 2.1709, lr_0 = 2.6537e-04
Loss = 2.5786e-01, PNorm = 62.8547, GNorm = 1.0994, lr_0 = 2.6912e-04
Loss = 2.3474e-01, PNorm = 62.8766, GNorm = 1.3199, lr_0 = 2.7288e-04
Loss = 2.7597e-01, PNorm = 62.8963, GNorm = 1.3940, lr_0 = 2.7663e-04
Loss = 2.2428e-01, PNorm = 62.9186, GNorm = 1.6595, lr_0 = 2.8038e-04
Loss = 2.7393e-01, PNorm = 62.9392, GNorm = 1.3235, lr_0 = 2.8413e-04
Loss = 2.6024e-01, PNorm = 62.9617, GNorm = 1.2884, lr_0 = 2.8787e-04
Loss = 2.9417e-01, PNorm = 62.9857, GNorm = 1.7257, lr_0 = 2.9163e-04
Loss = 2.6367e-01, PNorm = 63.0118, GNorm = 1.5319, lr_0 = 2.9538e-04
Loss = 2.6360e-01, PNorm = 63.0403, GNorm = 1.3961, lr_0 = 2.9913e-04
Loss = 2.4054e-01, PNorm = 63.0656, GNorm = 1.3912, lr_0 = 3.0288e-04
Loss = 2.3862e-01, PNorm = 63.0914, GNorm = 1.3295, lr_0 = 3.0662e-04
Loss = 2.5598e-01, PNorm = 63.1178, GNorm = 1.3182, lr_0 = 3.1037e-04
Loss = 2.8415e-01, PNorm = 63.1400, GNorm = 1.3382, lr_0 = 3.1413e-04
Loss = 2.8719e-01, PNorm = 63.1697, GNorm = 1.0423, lr_0 = 3.1788e-04
Loss = 2.8823e-01, PNorm = 63.2011, GNorm = 1.4675, lr_0 = 3.2163e-04
Loss = 2.7205e-01, PNorm = 63.2308, GNorm = 1.7507, lr_0 = 3.2538e-04
Loss = 2.7350e-01, PNorm = 63.2638, GNorm = 1.4170, lr_0 = 3.2912e-04
Loss = 2.6278e-01, PNorm = 63.2918, GNorm = 0.9868, lr_0 = 3.3288e-04
Loss = 2.6104e-01, PNorm = 63.3235, GNorm = 1.2936, lr_0 = 3.3663e-04
Loss = 2.3595e-01, PNorm = 63.3508, GNorm = 1.7792, lr_0 = 3.4038e-04
Loss = 2.4412e-01, PNorm = 63.3787, GNorm = 1.3406, lr_0 = 3.4413e-04
Loss = 2.3700e-01, PNorm = 63.4135, GNorm = 1.1819, lr_0 = 3.4787e-04
Loss = 2.2978e-01, PNorm = 63.4383, GNorm = 0.9775, lr_0 = 3.5162e-04
Loss = 2.4173e-01, PNorm = 63.4707, GNorm = 0.9952, lr_0 = 3.5538e-04
Loss = 2.2654e-01, PNorm = 63.5047, GNorm = 1.3643, lr_0 = 3.5913e-04
Loss = 2.9663e-01, PNorm = 63.5362, GNorm = 1.2216, lr_0 = 3.6288e-04
Loss = 2.1213e-01, PNorm = 63.5709, GNorm = 1.2803, lr_0 = 3.6662e-04
Loss = 2.6113e-01, PNorm = 63.6012, GNorm = 1.4162, lr_0 = 3.7037e-04
Loss = 2.2535e-01, PNorm = 63.6407, GNorm = 1.4108, lr_0 = 3.7413e-04
Loss = 2.4133e-01, PNorm = 63.6669, GNorm = 1.1067, lr_0 = 3.7788e-04
Loss = 2.2743e-01, PNorm = 63.6982, GNorm = 0.9743, lr_0 = 3.8163e-04
Loss = 2.5597e-01, PNorm = 63.7338, GNorm = 1.4497, lr_0 = 3.8537e-04
Loss = 2.3815e-01, PNorm = 63.7643, GNorm = 1.2116, lr_0 = 3.8912e-04
Loss = 2.2694e-01, PNorm = 63.8014, GNorm = 1.1184, lr_0 = 3.9287e-04
Loss = 2.1408e-01, PNorm = 63.8373, GNorm = 1.1732, lr_0 = 3.9663e-04
Loss = 2.3582e-01, PNorm = 63.8735, GNorm = 1.2720, lr_0 = 4.0038e-04
Loss = 2.1407e-01, PNorm = 63.9140, GNorm = 0.7994, lr_0 = 4.0413e-04
Loss = 2.2427e-01, PNorm = 63.9534, GNorm = 0.8487, lr_0 = 4.0787e-04
Loss = 2.3086e-01, PNorm = 63.9880, GNorm = 1.1023, lr_0 = 4.1162e-04
Loss = 2.3048e-01, PNorm = 64.0228, GNorm = 1.1326, lr_0 = 4.1537e-04
Loss = 2.4601e-01, PNorm = 64.0610, GNorm = 1.6893, lr_0 = 4.1913e-04
Loss = 2.6855e-01, PNorm = 64.0999, GNorm = 1.2429, lr_0 = 4.2288e-04
Loss = 2.2907e-01, PNorm = 64.1413, GNorm = 1.0588, lr_0 = 4.2662e-04
Loss = 2.7261e-01, PNorm = 64.1837, GNorm = 0.9412, lr_0 = 4.3037e-04
Loss = 2.3901e-01, PNorm = 64.2212, GNorm = 1.6930, lr_0 = 4.3412e-04
Loss = 2.6518e-01, PNorm = 64.2711, GNorm = 1.5129, lr_0 = 4.3788e-04
Loss = 2.2949e-01, PNorm = 64.3103, GNorm = 0.9281, lr_0 = 4.4163e-04
Loss = 2.5465e-01, PNorm = 64.3541, GNorm = 1.0865, lr_0 = 4.4538e-04
Loss = 2.5137e-01, PNorm = 64.3955, GNorm = 1.2653, lr_0 = 4.4912e-04
Loss = 2.2191e-01, PNorm = 64.4391, GNorm = 1.1753, lr_0 = 4.5287e-04
Loss = 2.6807e-01, PNorm = 64.4825, GNorm = 1.0722, lr_0 = 4.5662e-04
Loss = 2.2069e-01, PNorm = 64.5316, GNorm = 1.8594, lr_0 = 4.6038e-04
Loss = 2.2290e-01, PNorm = 64.5777, GNorm = 1.2486, lr_0 = 4.6413e-04
Loss = 2.3662e-01, PNorm = 64.6184, GNorm = 1.1500, lr_0 = 4.6787e-04
Loss = 2.3434e-01, PNorm = 64.6652, GNorm = 0.8986, lr_0 = 4.7162e-04
Loss = 2.4136e-01, PNorm = 64.7068, GNorm = 0.9876, lr_0 = 4.7537e-04
Loss = 2.0059e-01, PNorm = 64.7560, GNorm = 0.8398, lr_0 = 4.7913e-04
Loss = 2.2779e-01, PNorm = 64.7961, GNorm = 0.9253, lr_0 = 4.8288e-04
Loss = 2.0217e-01, PNorm = 64.8412, GNorm = 0.8967, lr_0 = 4.8663e-04
Loss = 2.1760e-01, PNorm = 64.8857, GNorm = 1.0247, lr_0 = 4.9038e-04
Loss = 2.0772e-01, PNorm = 64.9321, GNorm = 0.7580, lr_0 = 4.9412e-04
Loss = 2.3764e-01, PNorm = 64.9766, GNorm = 0.9762, lr_0 = 4.9788e-04
Loss = 2.2401e-01, PNorm = 65.0272, GNorm = 1.3361, lr_0 = 5.0163e-04
Loss = 2.5805e-01, PNorm = 65.0751, GNorm = 1.2803, lr_0 = 5.0538e-04
Loss = 2.1791e-01, PNorm = 65.1309, GNorm = 1.0832, lr_0 = 5.0913e-04
Loss = 1.9351e-01, PNorm = 65.1819, GNorm = 1.1659, lr_0 = 5.1287e-04
Loss = 2.1110e-01, PNorm = 65.2288, GNorm = 0.9282, lr_0 = 5.1663e-04
Loss = 2.0705e-01, PNorm = 65.2730, GNorm = 0.9082, lr_0 = 5.2038e-04
Loss = 2.1834e-01, PNorm = 65.3220, GNorm = 0.9558, lr_0 = 5.2413e-04
Loss = 1.9524e-01, PNorm = 65.3628, GNorm = 1.1154, lr_0 = 5.2788e-04
Loss = 2.1234e-01, PNorm = 65.4119, GNorm = 0.8817, lr_0 = 5.3162e-04
Loss = 2.2786e-01, PNorm = 65.4547, GNorm = 1.1446, lr_0 = 5.3538e-04
Loss = 2.3199e-01, PNorm = 65.5036, GNorm = 0.8628, lr_0 = 5.3912e-04
Loss = 2.1978e-01, PNorm = 65.5598, GNorm = 1.3134, lr_0 = 5.4288e-04
Loss = 2.1119e-01, PNorm = 65.6110, GNorm = 1.1090, lr_0 = 5.4663e-04
Loss = 2.1885e-01, PNorm = 65.6659, GNorm = 0.7379, lr_0 = 5.5038e-04
Validation mae = 0.560044
Epoch 1
Loss = 1.6804e-01, PNorm = 65.7227, GNorm = 0.7450, lr_0 = 5.5413e-04
Loss = 1.5668e-01, PNorm = 65.7785, GNorm = 1.0026, lr_0 = 5.5787e-04
Loss = 1.4451e-01, PNorm = 65.8320, GNorm = 0.7847, lr_0 = 5.6163e-04
Loss = 1.5460e-01, PNorm = 65.8872, GNorm = 0.9221, lr_0 = 5.6538e-04
Loss = 1.4937e-01, PNorm = 65.9466, GNorm = 0.8759, lr_0 = 5.6913e-04
Loss = 1.5166e-01, PNorm = 66.0005, GNorm = 0.7635, lr_0 = 5.7288e-04
Loss = 1.7124e-01, PNorm = 66.0636, GNorm = 0.8010, lr_0 = 5.7662e-04
Loss = 1.4272e-01, PNorm = 66.1228, GNorm = 0.7390, lr_0 = 5.8038e-04
Loss = 1.4290e-01, PNorm = 66.1811, GNorm = 0.7572, lr_0 = 5.8413e-04
Loss = 1.3937e-01, PNorm = 66.2396, GNorm = 0.9564, lr_0 = 5.8788e-04
Loss = 1.3832e-01, PNorm = 66.2910, GNorm = 1.2011, lr_0 = 5.9163e-04
Loss = 1.5963e-01, PNorm = 66.3468, GNorm = 0.9215, lr_0 = 5.9538e-04
Loss = 1.7130e-01, PNorm = 66.4106, GNorm = 1.1092, lr_0 = 5.9913e-04
Loss = 1.6652e-01, PNorm = 66.4769, GNorm = 1.0772, lr_0 = 6.0288e-04
Loss = 1.3276e-01, PNorm = 66.5456, GNorm = 0.7868, lr_0 = 6.0663e-04
Loss = 1.2796e-01, PNorm = 66.6016, GNorm = 0.7754, lr_0 = 6.1038e-04
Loss = 1.5088e-01, PNorm = 66.6648, GNorm = 0.7966, lr_0 = 6.1413e-04
Loss = 1.4179e-01, PNorm = 66.7244, GNorm = 0.8049, lr_0 = 6.1788e-04
Loss = 1.3170e-01, PNorm = 66.7860, GNorm = 0.7202, lr_0 = 6.2163e-04
Loss = 1.4906e-01, PNorm = 66.8501, GNorm = 0.7786, lr_0 = 6.2538e-04
Loss = 1.4391e-01, PNorm = 66.9187, GNorm = 0.8345, lr_0 = 6.2913e-04
Loss = 1.4548e-01, PNorm = 66.9889, GNorm = 0.7233, lr_0 = 6.3288e-04
Loss = 1.4051e-01, PNorm = 67.0553, GNorm = 1.0071, lr_0 = 6.3663e-04
Loss = 1.7124e-01, PNorm = 67.1205, GNorm = 1.2363, lr_0 = 6.4038e-04
Loss = 1.5198e-01, PNorm = 67.1910, GNorm = 0.7960, lr_0 = 6.4413e-04
Loss = 1.4117e-01, PNorm = 67.2689, GNorm = 0.9539, lr_0 = 6.4788e-04
Loss = 1.4680e-01, PNorm = 67.3379, GNorm = 0.7448, lr_0 = 6.5163e-04
Loss = 1.4724e-01, PNorm = 67.4120, GNorm = 0.8506, lr_0 = 6.5538e-04
Loss = 1.5290e-01, PNorm = 67.4800, GNorm = 0.5930, lr_0 = 6.5913e-04
Loss = 1.5898e-01, PNorm = 67.5580, GNorm = 1.0051, lr_0 = 6.6288e-04
Loss = 1.4118e-01, PNorm = 67.6358, GNorm = 0.8737, lr_0 = 6.6663e-04
Loss = 1.6004e-01, PNorm = 67.7165, GNorm = 0.7734, lr_0 = 6.7038e-04
Loss = 1.7543e-01, PNorm = 67.8096, GNorm = 0.8452, lr_0 = 6.7413e-04
Loss = 1.6095e-01, PNorm = 67.8962, GNorm = 0.9193, lr_0 = 6.7788e-04
Loss = 1.6501e-01, PNorm = 67.9861, GNorm = 0.9078, lr_0 = 6.8163e-04
Loss = 1.6879e-01, PNorm = 68.0818, GNorm = 0.9009, lr_0 = 6.8538e-04
Loss = 1.6539e-01, PNorm = 68.1747, GNorm = 0.8447, lr_0 = 6.8913e-04
Loss = 1.7268e-01, PNorm = 68.2740, GNorm = 0.7919, lr_0 = 6.9288e-04
Loss = 1.7019e-01, PNorm = 68.3684, GNorm = 0.8159, lr_0 = 6.9663e-04
Loss = 1.8192e-01, PNorm = 68.4557, GNorm = 0.7706, lr_0 = 7.0038e-04
Loss = 1.7872e-01, PNorm = 68.5403, GNorm = 0.7807, lr_0 = 7.0413e-04
Loss = 1.5924e-01, PNorm = 68.6382, GNorm = 0.8648, lr_0 = 7.0788e-04
Loss = 1.5872e-01, PNorm = 68.7200, GNorm = 0.8400, lr_0 = 7.1163e-04
Loss = 1.6066e-01, PNorm = 68.8110, GNorm = 0.8417, lr_0 = 7.1538e-04
Loss = 1.4651e-01, PNorm = 68.8967, GNorm = 0.8026, lr_0 = 7.1913e-04
Loss = 1.6129e-01, PNorm = 68.9888, GNorm = 0.8423, lr_0 = 7.2288e-04
Loss = 1.6506e-01, PNorm = 69.0771, GNorm = 0.9935, lr_0 = 7.2663e-04
Loss = 1.5296e-01, PNorm = 69.1737, GNorm = 0.5993, lr_0 = 7.3038e-04
Loss = 1.8404e-01, PNorm = 69.2669, GNorm = 0.7618, lr_0 = 7.3413e-04
Loss = 1.5524e-01, PNorm = 69.3687, GNorm = 0.8412, lr_0 = 7.3788e-04
Loss = 1.5551e-01, PNorm = 69.4610, GNorm = 0.6359, lr_0 = 7.4163e-04
Loss = 1.6148e-01, PNorm = 69.5557, GNorm = 0.6390, lr_0 = 7.4538e-04
Loss = 1.6944e-01, PNorm = 69.6484, GNorm = 0.6156, lr_0 = 7.4913e-04
Loss = 1.4728e-01, PNorm = 69.7351, GNorm = 0.8210, lr_0 = 7.5288e-04
Loss = 1.6279e-01, PNorm = 69.8352, GNorm = 0.7658, lr_0 = 7.5663e-04
Loss = 1.6315e-01, PNorm = 69.9196, GNorm = 1.0035, lr_0 = 7.6038e-04
Loss = 1.6674e-01, PNorm = 70.0235, GNorm = 0.9168, lr_0 = 7.6413e-04
Loss = 1.5929e-01, PNorm = 70.1177, GNorm = 0.6827, lr_0 = 7.6788e-04
Loss = 1.6811e-01, PNorm = 70.2160, GNorm = 0.8421, lr_0 = 7.7163e-04
Loss = 1.7567e-01, PNorm = 70.3163, GNorm = 0.7016, lr_0 = 7.7538e-04
Loss = 1.7742e-01, PNorm = 70.4108, GNorm = 0.7556, lr_0 = 7.7913e-04
Loss = 1.7753e-01, PNorm = 70.5219, GNorm = 1.0449, lr_0 = 7.8288e-04
Loss = 1.8980e-01, PNorm = 70.6428, GNorm = 1.0237, lr_0 = 7.8663e-04
Loss = 1.6495e-01, PNorm = 70.7568, GNorm = 0.8487, lr_0 = 7.9038e-04
Loss = 1.6913e-01, PNorm = 70.8606, GNorm = 0.6936, lr_0 = 7.9413e-04
Loss = 1.9030e-01, PNorm = 70.9778, GNorm = 1.2373, lr_0 = 7.9788e-04
Loss = 1.8366e-01, PNorm = 71.0849, GNorm = 0.8940, lr_0 = 8.0163e-04
Loss = 1.6561e-01, PNorm = 71.2048, GNorm = 1.1000, lr_0 = 8.0538e-04
Loss = 1.5775e-01, PNorm = 71.3166, GNorm = 1.0507, lr_0 = 8.0913e-04
Loss = 1.6480e-01, PNorm = 71.4280, GNorm = 0.9262, lr_0 = 8.1288e-04
Loss = 1.6792e-01, PNorm = 71.5359, GNorm = 0.7611, lr_0 = 8.1663e-04
Loss = 1.7439e-01, PNorm = 71.6599, GNorm = 1.2438, lr_0 = 8.2038e-04
Loss = 1.8742e-01, PNorm = 71.7583, GNorm = 0.7859, lr_0 = 8.2413e-04
Loss = 1.6666e-01, PNorm = 71.8776, GNorm = 1.2126, lr_0 = 8.2788e-04
Loss = 1.5360e-01, PNorm = 71.9822, GNorm = 0.7834, lr_0 = 8.3163e-04
Loss = 1.6419e-01, PNorm = 72.0861, GNorm = 1.0954, lr_0 = 8.3538e-04
Loss = 1.4448e-01, PNorm = 72.1869, GNorm = 0.6338, lr_0 = 8.3913e-04
Loss = 1.6540e-01, PNorm = 72.2865, GNorm = 0.8802, lr_0 = 8.4288e-04
Loss = 1.8117e-01, PNorm = 72.3975, GNorm = 1.0911, lr_0 = 8.4663e-04
Loss = 1.5852e-01, PNorm = 72.5095, GNorm = 0.7146, lr_0 = 8.5038e-04
Loss = 1.6938e-01, PNorm = 72.6160, GNorm = 0.6033, lr_0 = 8.5413e-04
Loss = 1.9148e-01, PNorm = 72.7276, GNorm = 0.8253, lr_0 = 8.5788e-04
Loss = 1.9433e-01, PNorm = 72.8510, GNorm = 0.7199, lr_0 = 8.6163e-04
Loss = 1.5495e-01, PNorm = 72.9611, GNorm = 0.7197, lr_0 = 8.6538e-04
Loss = 1.4633e-01, PNorm = 73.0723, GNorm = 0.9649, lr_0 = 8.6913e-04
Loss = 2.0318e-01, PNorm = 73.1786, GNorm = 1.0382, lr_0 = 8.7288e-04
Loss = 1.5510e-01, PNorm = 73.2965, GNorm = 0.7891, lr_0 = 8.7663e-04
Loss = 1.6704e-01, PNorm = 73.4098, GNorm = 0.6029, lr_0 = 8.8038e-04
Loss = 1.8684e-01, PNorm = 73.5346, GNorm = 0.9097, lr_0 = 8.8413e-04
Loss = 1.8009e-01, PNorm = 73.6727, GNorm = 0.7185, lr_0 = 8.8788e-04
Loss = 1.6937e-01, PNorm = 73.8026, GNorm = 0.6958, lr_0 = 8.9163e-04
Loss = 1.7474e-01, PNorm = 73.9208, GNorm = 1.0831, lr_0 = 8.9538e-04
Loss = 1.7864e-01, PNorm = 74.0482, GNorm = 1.7823, lr_0 = 8.9913e-04
Loss = 1.6207e-01, PNorm = 74.1766, GNorm = 0.8596, lr_0 = 9.0288e-04
Loss = 1.7085e-01, PNorm = 74.2932, GNorm = 0.9713, lr_0 = 9.0663e-04
Loss = 1.6651e-01, PNorm = 74.4224, GNorm = 0.8004, lr_0 = 9.1038e-04
Loss = 1.9409e-01, PNorm = 74.5633, GNorm = 0.8746, lr_0 = 9.1413e-04
Loss = 1.9543e-01, PNorm = 74.7068, GNorm = 0.6957, lr_0 = 9.1788e-04
Loss = 1.8464e-01, PNorm = 74.8529, GNorm = 0.6963, lr_0 = 9.2163e-04
Loss = 1.8533e-01, PNorm = 74.9960, GNorm = 0.8023, lr_0 = 9.2538e-04
Loss = 1.7187e-01, PNorm = 75.1232, GNorm = 0.9562, lr_0 = 9.2913e-04
Loss = 1.6185e-01, PNorm = 75.2503, GNorm = 0.6675, lr_0 = 9.3288e-04
Loss = 1.7635e-01, PNorm = 75.3572, GNorm = 0.7490, lr_0 = 9.3663e-04
Loss = 1.7768e-01, PNorm = 75.4902, GNorm = 0.5545, lr_0 = 9.4038e-04
Loss = 1.7250e-01, PNorm = 75.6259, GNorm = 1.4413, lr_0 = 9.4413e-04
Loss = 1.8780e-01, PNorm = 75.7425, GNorm = 1.0488, lr_0 = 9.4788e-04
Loss = 1.8642e-01, PNorm = 75.8751, GNorm = 0.9843, lr_0 = 9.5163e-04
Loss = 1.4980e-01, PNorm = 76.0033, GNorm = 0.7423, lr_0 = 9.5538e-04
Loss = 1.9877e-01, PNorm = 76.1417, GNorm = 0.6765, lr_0 = 9.5913e-04
Loss = 1.7623e-01, PNorm = 76.2841, GNorm = 0.8828, lr_0 = 9.6288e-04
Loss = 1.8363e-01, PNorm = 76.4298, GNorm = 0.9169, lr_0 = 9.6663e-04
Loss = 1.6321e-01, PNorm = 76.5605, GNorm = 0.5403, lr_0 = 9.7038e-04
Loss = 1.8020e-01, PNorm = 76.6982, GNorm = 0.8489, lr_0 = 9.7413e-04
Loss = 1.9643e-01, PNorm = 76.8251, GNorm = 0.6448, lr_0 = 9.7788e-04
Loss = 1.8703e-01, PNorm = 76.9622, GNorm = 0.5776, lr_0 = 9.8163e-04
Loss = 1.5924e-01, PNorm = 77.0866, GNorm = 0.5474, lr_0 = 9.8537e-04
Loss = 1.7895e-01, PNorm = 77.2248, GNorm = 0.9129, lr_0 = 9.8912e-04
Loss = 2.0116e-01, PNorm = 77.3415, GNorm = 0.6148, lr_0 = 9.9288e-04
Loss = 1.9102e-01, PNorm = 77.4838, GNorm = 0.7503, lr_0 = 9.9663e-04
Loss = 1.9617e-01, PNorm = 77.6286, GNorm = 0.7401, lr_0 = 9.9993e-04
Validation mae = 0.539967
Epoch 2
Loss = 1.0278e-01, PNorm = 77.7684, GNorm = 0.5850, lr_0 = 9.9925e-04
Loss = 1.0422e-01, PNorm = 77.8852, GNorm = 0.6505, lr_0 = 9.9856e-04
Loss = 1.2219e-01, PNorm = 77.9887, GNorm = 0.6403, lr_0 = 9.9788e-04
Loss = 1.1634e-01, PNorm = 78.0907, GNorm = 1.1193, lr_0 = 9.9719e-04
Loss = 9.7952e-02, PNorm = 78.1774, GNorm = 0.4796, lr_0 = 9.9651e-04
Loss = 1.1090e-01, PNorm = 78.2736, GNorm = 1.0032, lr_0 = 9.9583e-04
Loss = 9.7891e-02, PNorm = 78.3648, GNorm = 0.5221, lr_0 = 9.9515e-04
Loss = 1.1192e-01, PNorm = 78.4513, GNorm = 0.5735, lr_0 = 9.9446e-04
Loss = 1.1311e-01, PNorm = 78.5353, GNorm = 0.7007, lr_0 = 9.9378e-04
Loss = 1.0335e-01, PNorm = 78.6367, GNorm = 0.5826, lr_0 = 9.9310e-04
Loss = 1.0111e-01, PNorm = 78.7293, GNorm = 0.5878, lr_0 = 9.9242e-04
Loss = 1.0731e-01, PNorm = 78.8173, GNorm = 0.5382, lr_0 = 9.9174e-04
Loss = 9.1834e-02, PNorm = 78.9135, GNorm = 0.7987, lr_0 = 9.9106e-04
Loss = 1.1031e-01, PNorm = 79.0088, GNorm = 0.7178, lr_0 = 9.9038e-04
Loss = 1.1868e-01, PNorm = 79.1168, GNorm = 0.9572, lr_0 = 9.8971e-04
Loss = 1.0713e-01, PNorm = 79.2258, GNorm = 0.7739, lr_0 = 9.8903e-04
Loss = 1.1644e-01, PNorm = 79.3245, GNorm = 0.8073, lr_0 = 9.8835e-04
Loss = 9.7715e-02, PNorm = 79.4262, GNorm = 0.6276, lr_0 = 9.8767e-04
Loss = 1.0605e-01, PNorm = 79.5278, GNorm = 0.7025, lr_0 = 9.8700e-04
Loss = 1.0938e-01, PNorm = 79.6266, GNorm = 0.7450, lr_0 = 9.8632e-04
Loss = 1.2044e-01, PNorm = 79.7233, GNorm = 0.6641, lr_0 = 9.8564e-04
Loss = 1.2355e-01, PNorm = 79.8429, GNorm = 1.1792, lr_0 = 9.8497e-04
Loss = 1.1259e-01, PNorm = 79.9562, GNorm = 0.7519, lr_0 = 9.8429e-04
Loss = 1.1297e-01, PNorm = 80.0703, GNorm = 1.1139, lr_0 = 9.8362e-04
Loss = 1.1410e-01, PNorm = 80.1763, GNorm = 0.8729, lr_0 = 9.8295e-04
Loss = 1.1045e-01, PNorm = 80.2809, GNorm = 0.5962, lr_0 = 9.8227e-04
Loss = 1.1504e-01, PNorm = 80.3719, GNorm = 0.5537, lr_0 = 9.8160e-04
Loss = 1.1238e-01, PNorm = 80.4800, GNorm = 0.7354, lr_0 = 9.8093e-04
Loss = 1.2740e-01, PNorm = 80.5854, GNorm = 0.7341, lr_0 = 9.8026e-04
Loss = 1.0321e-01, PNorm = 80.6874, GNorm = 0.5878, lr_0 = 9.7958e-04
Loss = 1.0045e-01, PNorm = 80.7951, GNorm = 0.6776, lr_0 = 9.7891e-04
Loss = 1.1037e-01, PNorm = 80.8918, GNorm = 0.7976, lr_0 = 9.7824e-04
Loss = 1.0831e-01, PNorm = 80.9985, GNorm = 0.6664, lr_0 = 9.7757e-04
Loss = 1.0855e-01, PNorm = 81.1042, GNorm = 0.5950, lr_0 = 9.7690e-04
Loss = 1.0883e-01, PNorm = 81.2047, GNorm = 0.8268, lr_0 = 9.7623e-04
Loss = 1.2497e-01, PNorm = 81.3120, GNorm = 0.5741, lr_0 = 9.7556e-04
Loss = 1.0347e-01, PNorm = 81.4209, GNorm = 0.5860, lr_0 = 9.7490e-04
Loss = 1.0808e-01, PNorm = 81.5205, GNorm = 0.5082, lr_0 = 9.7423e-04
Loss = 1.2578e-01, PNorm = 81.6336, GNorm = 0.6289, lr_0 = 9.7356e-04
Loss = 1.0268e-01, PNorm = 81.7509, GNorm = 0.5406, lr_0 = 9.7289e-04
Loss = 9.6995e-02, PNorm = 81.8560, GNorm = 0.6865, lr_0 = 9.7223e-04
Loss = 1.1526e-01, PNorm = 81.9519, GNorm = 0.8725, lr_0 = 9.7156e-04
Loss = 1.1008e-01, PNorm = 82.0556, GNorm = 0.4516, lr_0 = 9.7090e-04
Loss = 1.1458e-01, PNorm = 82.1611, GNorm = 1.1947, lr_0 = 9.7023e-04
Loss = 1.0476e-01, PNorm = 82.2575, GNorm = 0.6289, lr_0 = 9.6957e-04
Loss = 1.0460e-01, PNorm = 82.3633, GNorm = 0.4261, lr_0 = 9.6890e-04
Loss = 1.1558e-01, PNorm = 82.4679, GNorm = 0.4651, lr_0 = 9.6824e-04
Loss = 1.0501e-01, PNorm = 82.5698, GNorm = 0.7508, lr_0 = 9.6757e-04
Loss = 1.0661e-01, PNorm = 82.6780, GNorm = 0.8098, lr_0 = 9.6691e-04
Loss = 1.1984e-01, PNorm = 82.7814, GNorm = 1.1428, lr_0 = 9.6625e-04
Loss = 1.1579e-01, PNorm = 82.9028, GNorm = 0.6176, lr_0 = 9.6559e-04
Loss = 1.1236e-01, PNorm = 82.9984, GNorm = 0.5651, lr_0 = 9.6493e-04
Loss = 9.9111e-02, PNorm = 83.1054, GNorm = 0.7169, lr_0 = 9.6427e-04
Loss = 1.0379e-01, PNorm = 83.2032, GNorm = 0.5942, lr_0 = 9.6360e-04
Loss = 1.2688e-01, PNorm = 83.3202, GNorm = 0.5269, lr_0 = 9.6294e-04
Loss = 1.1490e-01, PNorm = 83.4308, GNorm = 0.5465, lr_0 = 9.6228e-04
Loss = 1.2539e-01, PNorm = 83.5458, GNorm = 0.6075, lr_0 = 9.6163e-04
Loss = 1.2083e-01, PNorm = 83.6565, GNorm = 1.9857, lr_0 = 9.6097e-04
Loss = 1.0061e-01, PNorm = 83.7656, GNorm = 0.6019, lr_0 = 9.6031e-04
Loss = 1.1150e-01, PNorm = 83.8639, GNorm = 0.4981, lr_0 = 9.5965e-04
Loss = 1.2813e-01, PNorm = 83.9685, GNorm = 0.5421, lr_0 = 9.5899e-04
Loss = 1.2030e-01, PNorm = 84.0747, GNorm = 0.6313, lr_0 = 9.5834e-04
Loss = 1.0596e-01, PNorm = 84.1815, GNorm = 0.9525, lr_0 = 9.5768e-04
Loss = 1.1660e-01, PNorm = 84.2877, GNorm = 0.8024, lr_0 = 9.5702e-04
Loss = 1.1761e-01, PNorm = 84.4059, GNorm = 0.5961, lr_0 = 9.5637e-04
Loss = 1.1299e-01, PNorm = 84.5077, GNorm = 0.8944, lr_0 = 9.5571e-04
Loss = 1.2498e-01, PNorm = 84.6147, GNorm = 0.6422, lr_0 = 9.5506e-04
Loss = 1.1416e-01, PNorm = 84.7280, GNorm = 0.5881, lr_0 = 9.5440e-04
Loss = 1.0021e-01, PNorm = 84.8357, GNorm = 0.7829, lr_0 = 9.5375e-04
Loss = 1.0916e-01, PNorm = 84.9495, GNorm = 0.5606, lr_0 = 9.5310e-04
Loss = 1.0896e-01, PNorm = 85.0448, GNorm = 0.6283, lr_0 = 9.5244e-04
Loss = 1.2469e-01, PNorm = 85.1626, GNorm = 0.7745, lr_0 = 9.5179e-04
Loss = 1.1934e-01, PNorm = 85.2546, GNorm = 0.5232, lr_0 = 9.5114e-04
Loss = 1.0287e-01, PNorm = 85.3714, GNorm = 0.5046, lr_0 = 9.5049e-04
Loss = 1.2481e-01, PNorm = 85.4700, GNorm = 0.8965, lr_0 = 9.4984e-04
Loss = 1.0518e-01, PNorm = 85.5730, GNorm = 0.7923, lr_0 = 9.4919e-04
Loss = 1.1012e-01, PNorm = 85.6754, GNorm = 0.7432, lr_0 = 9.4854e-04
Loss = 1.1556e-01, PNorm = 85.7911, GNorm = 1.2872, lr_0 = 9.4789e-04
Loss = 1.0338e-01, PNorm = 85.8892, GNorm = 0.5748, lr_0 = 9.4724e-04
Loss = 1.1497e-01, PNorm = 85.9953, GNorm = 0.7771, lr_0 = 9.4659e-04
Loss = 1.1081e-01, PNorm = 86.1008, GNorm = 0.5428, lr_0 = 9.4594e-04
Loss = 1.0941e-01, PNorm = 86.1939, GNorm = 0.5322, lr_0 = 9.4529e-04
Loss = 1.1142e-01, PNorm = 86.2973, GNorm = 0.4917, lr_0 = 9.4464e-04
Loss = 1.2011e-01, PNorm = 86.3964, GNorm = 0.9384, lr_0 = 9.4400e-04
Loss = 1.2061e-01, PNorm = 86.5129, GNorm = 0.7104, lr_0 = 9.4335e-04
Loss = 1.1240e-01, PNorm = 86.6154, GNorm = 0.4202, lr_0 = 9.4270e-04
Loss = 1.2413e-01, PNorm = 86.7259, GNorm = 0.9447, lr_0 = 9.4206e-04
Loss = 1.2160e-01, PNorm = 86.8356, GNorm = 0.4762, lr_0 = 9.4141e-04
Loss = 1.0496e-01, PNorm = 86.9492, GNorm = 0.6201, lr_0 = 9.4077e-04
Loss = 1.2193e-01, PNorm = 87.0608, GNorm = 0.7649, lr_0 = 9.4012e-04
Loss = 1.1653e-01, PNorm = 87.1740, GNorm = 1.3240, lr_0 = 9.3948e-04
Loss = 1.3539e-01, PNorm = 87.2803, GNorm = 1.0912, lr_0 = 9.3884e-04
Loss = 1.1603e-01, PNorm = 87.4003, GNorm = 0.5889, lr_0 = 9.3819e-04
Loss = 1.2043e-01, PNorm = 87.5304, GNorm = 0.5966, lr_0 = 9.3755e-04
Loss = 1.0212e-01, PNorm = 87.6335, GNorm = 0.6595, lr_0 = 9.3691e-04
Loss = 1.2624e-01, PNorm = 87.7469, GNorm = 1.0227, lr_0 = 9.3627e-04
Loss = 1.2899e-01, PNorm = 87.8623, GNorm = 0.6789, lr_0 = 9.3562e-04
Loss = 1.1193e-01, PNorm = 87.9820, GNorm = 0.8285, lr_0 = 9.3498e-04
Loss = 1.1261e-01, PNorm = 88.0918, GNorm = 0.5407, lr_0 = 9.3434e-04
Loss = 1.2507e-01, PNorm = 88.1952, GNorm = 0.7079, lr_0 = 9.3370e-04
Loss = 1.1749e-01, PNorm = 88.3157, GNorm = 0.5208, lr_0 = 9.3306e-04
Loss = 1.1291e-01, PNorm = 88.4109, GNorm = 0.4958, lr_0 = 9.3242e-04
Loss = 1.2126e-01, PNorm = 88.5139, GNorm = 1.3400, lr_0 = 9.3178e-04
Loss = 1.1856e-01, PNorm = 88.6124, GNorm = 0.7815, lr_0 = 9.3115e-04
Loss = 1.1574e-01, PNorm = 88.7155, GNorm = 0.7510, lr_0 = 9.3051e-04
Loss = 1.1969e-01, PNorm = 88.8288, GNorm = 0.6284, lr_0 = 9.2987e-04
Loss = 1.2425e-01, PNorm = 88.9343, GNorm = 0.5359, lr_0 = 9.2923e-04
Loss = 1.1716e-01, PNorm = 89.0427, GNorm = 0.4947, lr_0 = 9.2860e-04
Loss = 1.0537e-01, PNorm = 89.1467, GNorm = 0.8165, lr_0 = 9.2796e-04
Loss = 1.0444e-01, PNorm = 89.2456, GNorm = 0.6856, lr_0 = 9.2733e-04
Loss = 1.2078e-01, PNorm = 89.3508, GNorm = 1.0155, lr_0 = 9.2669e-04
Loss = 1.1949e-01, PNorm = 89.4662, GNorm = 0.3763, lr_0 = 9.2606e-04
Loss = 1.2175e-01, PNorm = 89.5808, GNorm = 0.8668, lr_0 = 9.2542e-04
Loss = 1.1179e-01, PNorm = 89.6845, GNorm = 0.8215, lr_0 = 9.2479e-04
Loss = 1.2662e-01, PNorm = 89.7985, GNorm = 0.6568, lr_0 = 9.2415e-04
Loss = 1.2967e-01, PNorm = 89.9129, GNorm = 0.6783, lr_0 = 9.2352e-04
Loss = 1.3354e-01, PNorm = 90.0314, GNorm = 1.0204, lr_0 = 9.2289e-04
Loss = 1.2889e-01, PNorm = 90.1561, GNorm = 1.1749, lr_0 = 9.2226e-04
Loss = 1.0773e-01, PNorm = 90.2837, GNorm = 0.7762, lr_0 = 9.2162e-04
Loss = 1.1793e-01, PNorm = 90.3792, GNorm = 0.7157, lr_0 = 9.2099e-04
Validation mae = 0.510109
Epoch 3
Loss = 8.5680e-02, PNorm = 90.4811, GNorm = 1.0222, lr_0 = 9.2036e-04
Loss = 7.5147e-02, PNorm = 90.5726, GNorm = 0.6541, lr_0 = 9.1973e-04
Loss = 7.1589e-02, PNorm = 90.6456, GNorm = 0.6466, lr_0 = 9.1910e-04
Loss = 7.8162e-02, PNorm = 90.7259, GNorm = 0.7625, lr_0 = 9.1847e-04
Loss = 6.2115e-02, PNorm = 90.7894, GNorm = 0.4797, lr_0 = 9.1784e-04
Loss = 6.4579e-02, PNorm = 90.8517, GNorm = 0.4935, lr_0 = 9.1721e-04
Loss = 7.8518e-02, PNorm = 90.9063, GNorm = 0.4069, lr_0 = 9.1658e-04
Loss = 6.5035e-02, PNorm = 90.9784, GNorm = 0.4734, lr_0 = 9.1596e-04
Loss = 6.1419e-02, PNorm = 91.0445, GNorm = 0.8194, lr_0 = 9.1533e-04
Loss = 6.2642e-02, PNorm = 91.1046, GNorm = 0.3702, lr_0 = 9.1470e-04
Loss = 6.5920e-02, PNorm = 91.1597, GNorm = 0.7277, lr_0 = 9.1408e-04
Loss = 7.3239e-02, PNorm = 91.2360, GNorm = 0.8567, lr_0 = 9.1345e-04
Loss = 6.6365e-02, PNorm = 91.2990, GNorm = 0.6523, lr_0 = 9.1282e-04
Loss = 6.8655e-02, PNorm = 91.3622, GNorm = 0.4177, lr_0 = 9.1220e-04
Loss = 6.5273e-02, PNorm = 91.4346, GNorm = 0.4434, lr_0 = 9.1157e-04
Loss = 6.1750e-02, PNorm = 91.5001, GNorm = 0.4203, lr_0 = 9.1095e-04
Loss = 6.4051e-02, PNorm = 91.5628, GNorm = 0.7238, lr_0 = 9.1032e-04
Loss = 6.2455e-02, PNorm = 91.6200, GNorm = 0.4151, lr_0 = 9.0970e-04
Loss = 6.1018e-02, PNorm = 91.6805, GNorm = 0.2881, lr_0 = 9.0908e-04
Loss = 7.3285e-02, PNorm = 91.7524, GNorm = 0.4943, lr_0 = 9.0846e-04
Loss = 6.1965e-02, PNorm = 91.8148, GNorm = 0.5233, lr_0 = 9.0783e-04
Loss = 6.3372e-02, PNorm = 91.8880, GNorm = 0.3238, lr_0 = 9.0721e-04
Loss = 6.8028e-02, PNorm = 91.9596, GNorm = 0.5859, lr_0 = 9.0659e-04
Loss = 6.2688e-02, PNorm = 92.0267, GNorm = 0.4110, lr_0 = 9.0597e-04
Loss = 6.4057e-02, PNorm = 92.0901, GNorm = 0.8387, lr_0 = 9.0535e-04
Loss = 6.4291e-02, PNorm = 92.1647, GNorm = 0.4930, lr_0 = 9.0473e-04
Loss = 7.1328e-02, PNorm = 92.2302, GNorm = 0.3729, lr_0 = 9.0411e-04
Loss = 8.4233e-02, PNorm = 92.3046, GNorm = 0.4599, lr_0 = 9.0349e-04
Loss = 6.8849e-02, PNorm = 92.3803, GNorm = 0.5374, lr_0 = 9.0287e-04
Loss = 7.5709e-02, PNorm = 92.4636, GNorm = 0.6479, lr_0 = 9.0225e-04
Loss = 7.4327e-02, PNorm = 92.5363, GNorm = 0.5978, lr_0 = 9.0163e-04
Loss = 6.7470e-02, PNorm = 92.6245, GNorm = 0.2817, lr_0 = 9.0102e-04
Loss = 6.8287e-02, PNorm = 92.7029, GNorm = 0.8289, lr_0 = 9.0040e-04
Loss = 6.6863e-02, PNorm = 92.7872, GNorm = 0.7347, lr_0 = 8.9978e-04
Loss = 7.6998e-02, PNorm = 92.8726, GNorm = 0.4786, lr_0 = 8.9916e-04
Loss = 7.2201e-02, PNorm = 92.9417, GNorm = 0.4541, lr_0 = 8.9855e-04
Loss = 6.6833e-02, PNorm = 93.0191, GNorm = 0.5591, lr_0 = 8.9793e-04
Loss = 7.2358e-02, PNorm = 93.0878, GNorm = 0.5496, lr_0 = 8.9732e-04
Loss = 6.9828e-02, PNorm = 93.1671, GNorm = 0.5314, lr_0 = 8.9670e-04
Loss = 7.5725e-02, PNorm = 93.2458, GNorm = 0.3962, lr_0 = 8.9609e-04
Loss = 6.8393e-02, PNorm = 93.3213, GNorm = 0.3283, lr_0 = 8.9548e-04
Loss = 6.3364e-02, PNorm = 93.3893, GNorm = 0.6676, lr_0 = 8.9486e-04
Loss = 7.1183e-02, PNorm = 93.4575, GNorm = 0.6845, lr_0 = 8.9425e-04
Loss = 6.7620e-02, PNorm = 93.5311, GNorm = 0.5117, lr_0 = 8.9364e-04
Loss = 6.3083e-02, PNorm = 93.6031, GNorm = 0.4384, lr_0 = 8.9302e-04
Loss = 6.3271e-02, PNorm = 93.6816, GNorm = 0.6050, lr_0 = 8.9241e-04
Loss = 8.4739e-02, PNorm = 93.7594, GNorm = 0.5123, lr_0 = 8.9180e-04
Loss = 6.7205e-02, PNorm = 93.8359, GNorm = 0.3965, lr_0 = 8.9119e-04
Loss = 6.9680e-02, PNorm = 93.9176, GNorm = 0.5767, lr_0 = 8.9058e-04
Loss = 7.2100e-02, PNorm = 93.9959, GNorm = 0.4249, lr_0 = 8.8997e-04
Loss = 5.9668e-02, PNorm = 94.0719, GNorm = 0.4598, lr_0 = 8.8936e-04
Loss = 6.2532e-02, PNorm = 94.1479, GNorm = 0.7003, lr_0 = 8.8875e-04
Loss = 7.1379e-02, PNorm = 94.2169, GNorm = 0.5598, lr_0 = 8.8814e-04
Loss = 7.6153e-02, PNorm = 94.2771, GNorm = 0.5304, lr_0 = 8.8753e-04
Loss = 7.7810e-02, PNorm = 94.3481, GNorm = 0.4400, lr_0 = 8.8693e-04
Loss = 6.9261e-02, PNorm = 94.4297, GNorm = 0.5168, lr_0 = 8.8632e-04
Loss = 7.0213e-02, PNorm = 94.5092, GNorm = 0.4282, lr_0 = 8.8571e-04
Loss = 6.8572e-02, PNorm = 94.5830, GNorm = 0.6189, lr_0 = 8.8510e-04
Loss = 6.8635e-02, PNorm = 94.6692, GNorm = 0.9620, lr_0 = 8.8450e-04
Loss = 8.0124e-02, PNorm = 94.7454, GNorm = 0.9795, lr_0 = 8.8389e-04
Loss = 8.3221e-02, PNorm = 94.8416, GNorm = 0.4986, lr_0 = 8.8329e-04
Loss = 6.8549e-02, PNorm = 94.9195, GNorm = 0.8902, lr_0 = 8.8268e-04
Loss = 7.2608e-02, PNorm = 95.0017, GNorm = 0.8230, lr_0 = 8.8208e-04
Loss = 7.3694e-02, PNorm = 95.0814, GNorm = 0.3341, lr_0 = 8.8147e-04
Loss = 7.2156e-02, PNorm = 95.1674, GNorm = 0.4440, lr_0 = 8.8087e-04
Loss = 6.7748e-02, PNorm = 95.2533, GNorm = 0.4596, lr_0 = 8.8026e-04
Loss = 7.4120e-02, PNorm = 95.3277, GNorm = 0.6969, lr_0 = 8.7966e-04
Loss = 7.0950e-02, PNorm = 95.4158, GNorm = 0.7161, lr_0 = 8.7906e-04
Loss = 7.3874e-02, PNorm = 95.5021, GNorm = 0.5574, lr_0 = 8.7846e-04
Loss = 7.5075e-02, PNorm = 95.5861, GNorm = 0.3505, lr_0 = 8.7785e-04
Loss = 7.0967e-02, PNorm = 95.6739, GNorm = 0.5135, lr_0 = 8.7725e-04
Loss = 7.2895e-02, PNorm = 95.7608, GNorm = 0.7089, lr_0 = 8.7665e-04
Loss = 7.0806e-02, PNorm = 95.8420, GNorm = 0.3995, lr_0 = 8.7605e-04
Loss = 7.2994e-02, PNorm = 95.9250, GNorm = 0.5033, lr_0 = 8.7545e-04
Loss = 5.7252e-02, PNorm = 95.9948, GNorm = 0.3802, lr_0 = 8.7485e-04
Loss = 7.0179e-02, PNorm = 96.0727, GNorm = 0.4472, lr_0 = 8.7425e-04
Loss = 8.6237e-02, PNorm = 96.1436, GNorm = 0.4972, lr_0 = 8.7365e-04
Loss = 8.4250e-02, PNorm = 96.2276, GNorm = 0.6413, lr_0 = 8.7306e-04
Loss = 7.1653e-02, PNorm = 96.3134, GNorm = 0.5377, lr_0 = 8.7246e-04
Loss = 6.8239e-02, PNorm = 96.4002, GNorm = 0.5832, lr_0 = 8.7186e-04
Loss = 8.9117e-02, PNorm = 96.4906, GNorm = 0.5072, lr_0 = 8.7126e-04
Loss = 7.5897e-02, PNorm = 96.5804, GNorm = 0.7091, lr_0 = 8.7067e-04
Loss = 7.3899e-02, PNorm = 96.6772, GNorm = 0.5322, lr_0 = 8.7007e-04
Loss = 6.8264e-02, PNorm = 96.7687, GNorm = 0.6569, lr_0 = 8.6947e-04
Loss = 6.9073e-02, PNorm = 96.8507, GNorm = 0.5589, lr_0 = 8.6888e-04
Loss = 7.1239e-02, PNorm = 96.9322, GNorm = 0.3303, lr_0 = 8.6828e-04
Loss = 6.4226e-02, PNorm = 97.0088, GNorm = 0.7368, lr_0 = 8.6769e-04
Loss = 6.0664e-02, PNorm = 97.0932, GNorm = 0.2512, lr_0 = 8.6709e-04
Loss = 6.5216e-02, PNorm = 97.1626, GNorm = 0.4476, lr_0 = 8.6650e-04
Loss = 6.6779e-02, PNorm = 97.2376, GNorm = 0.4480, lr_0 = 8.6590e-04
Loss = 7.2073e-02, PNorm = 97.3115, GNorm = 0.5133, lr_0 = 8.6531e-04
Loss = 6.4926e-02, PNorm = 97.3970, GNorm = 0.6487, lr_0 = 8.6472e-04
Loss = 7.6947e-02, PNorm = 97.4731, GNorm = 0.5281, lr_0 = 8.6413e-04
Loss = 7.4768e-02, PNorm = 97.5588, GNorm = 0.5749, lr_0 = 8.6353e-04
Loss = 7.7010e-02, PNorm = 97.6466, GNorm = 0.7470, lr_0 = 8.6294e-04
Loss = 8.1286e-02, PNorm = 97.7328, GNorm = 0.9111, lr_0 = 8.6235e-04
Loss = 7.8907e-02, PNorm = 97.8314, GNorm = 0.3175, lr_0 = 8.6176e-04
Loss = 6.9616e-02, PNorm = 97.9178, GNorm = 0.5074, lr_0 = 8.6117e-04
Loss = 7.4735e-02, PNorm = 98.0063, GNorm = 0.4186, lr_0 = 8.6058e-04
Loss = 7.9310e-02, PNorm = 98.0916, GNorm = 0.4670, lr_0 = 8.5999e-04
Loss = 8.1811e-02, PNorm = 98.1874, GNorm = 0.6832, lr_0 = 8.5940e-04
Loss = 8.8091e-02, PNorm = 98.2896, GNorm = 1.5657, lr_0 = 8.5881e-04
Loss = 7.7843e-02, PNorm = 98.3877, GNorm = 0.4602, lr_0 = 8.5823e-04
Loss = 8.2913e-02, PNorm = 98.4844, GNorm = 0.6330, lr_0 = 8.5764e-04
Loss = 8.5365e-02, PNorm = 98.5895, GNorm = 1.2182, lr_0 = 8.5705e-04
Loss = 7.2192e-02, PNorm = 98.6800, GNorm = 0.4343, lr_0 = 8.5646e-04
Loss = 7.2661e-02, PNorm = 98.7687, GNorm = 0.5532, lr_0 = 8.5588e-04
Loss = 6.1873e-02, PNorm = 98.8477, GNorm = 0.4044, lr_0 = 8.5529e-04
Loss = 8.3420e-02, PNorm = 98.9445, GNorm = 1.4385, lr_0 = 8.5470e-04
Loss = 7.5049e-02, PNorm = 99.0379, GNorm = 0.6626, lr_0 = 8.5412e-04
Loss = 8.3421e-02, PNorm = 99.1418, GNorm = 1.1323, lr_0 = 8.5353e-04
Loss = 7.1173e-02, PNorm = 99.2336, GNorm = 0.5015, lr_0 = 8.5295e-04
Loss = 7.5274e-02, PNorm = 99.3158, GNorm = 0.6696, lr_0 = 8.5236e-04
Loss = 7.9769e-02, PNorm = 99.4026, GNorm = 0.6093, lr_0 = 8.5178e-04
Loss = 8.1082e-02, PNorm = 99.4822, GNorm = 0.8694, lr_0 = 8.5120e-04
Loss = 7.4695e-02, PNorm = 99.5838, GNorm = 0.7366, lr_0 = 8.5061e-04
Loss = 8.5651e-02, PNorm = 99.6820, GNorm = 0.4110, lr_0 = 8.5003e-04
Loss = 8.2312e-02, PNorm = 99.7779, GNorm = 0.3529, lr_0 = 8.4945e-04
Loss = 7.4212e-02, PNorm = 99.8765, GNorm = 0.8793, lr_0 = 8.4887e-04
Loss = 7.6002e-02, PNorm = 99.9756, GNorm = 0.5923, lr_0 = 8.4828e-04
Validation mae = 0.505495
Epoch 4
Loss = 4.8474e-02, PNorm = 100.0607, GNorm = 0.4813, lr_0 = 8.4770e-04
Loss = 5.3711e-02, PNorm = 100.1380, GNorm = 0.7161, lr_0 = 8.4712e-04
Loss = 4.4439e-02, PNorm = 100.1905, GNorm = 0.4157, lr_0 = 8.4654e-04
Loss = 5.2812e-02, PNorm = 100.2493, GNorm = 0.4046, lr_0 = 8.4596e-04
Loss = 5.7437e-02, PNorm = 100.3065, GNorm = 0.7006, lr_0 = 8.4538e-04
Loss = 4.7534e-02, PNorm = 100.3683, GNorm = 0.5681, lr_0 = 8.4480e-04
Loss = 4.8258e-02, PNorm = 100.4250, GNorm = 0.3449, lr_0 = 8.4423e-04
Loss = 4.8854e-02, PNorm = 100.4783, GNorm = 0.3508, lr_0 = 8.4365e-04
Loss = 4.2551e-02, PNorm = 100.5275, GNorm = 0.6106, lr_0 = 8.4307e-04
Loss = 4.8691e-02, PNorm = 100.5847, GNorm = 0.3616, lr_0 = 8.4249e-04
Loss = 4.4532e-02, PNorm = 100.6323, GNorm = 0.4778, lr_0 = 8.4191e-04
Loss = 4.9208e-02, PNorm = 100.6982, GNorm = 0.7323, lr_0 = 8.4134e-04
Loss = 4.6809e-02, PNorm = 100.7502, GNorm = 0.6588, lr_0 = 8.4076e-04
Loss = 4.6007e-02, PNorm = 100.8115, GNorm = 0.3192, lr_0 = 8.4019e-04
Loss = 4.9469e-02, PNorm = 100.8639, GNorm = 0.5309, lr_0 = 8.3961e-04
Loss = 4.8500e-02, PNorm = 100.9231, GNorm = 0.4592, lr_0 = 8.3903e-04
Loss = 4.8689e-02, PNorm = 100.9804, GNorm = 0.3290, lr_0 = 8.3846e-04
Loss = 5.0332e-02, PNorm = 101.0364, GNorm = 0.2682, lr_0 = 8.3789e-04
Loss = 5.0082e-02, PNorm = 101.0948, GNorm = 0.8531, lr_0 = 8.3731e-04
Loss = 4.1683e-02, PNorm = 101.1506, GNorm = 0.4910, lr_0 = 8.3674e-04
Loss = 4.5538e-02, PNorm = 101.2053, GNorm = 0.4479, lr_0 = 8.3616e-04
Loss = 3.7639e-02, PNorm = 101.2585, GNorm = 0.4626, lr_0 = 8.3559e-04
Loss = 4.7287e-02, PNorm = 101.3176, GNorm = 0.4033, lr_0 = 8.3502e-04
Loss = 4.2936e-02, PNorm = 101.3817, GNorm = 0.3561, lr_0 = 8.3445e-04
Loss = 4.9583e-02, PNorm = 101.4308, GNorm = 0.4320, lr_0 = 8.3388e-04
Loss = 4.2322e-02, PNorm = 101.4859, GNorm = 0.2958, lr_0 = 8.3330e-04
Loss = 3.8356e-02, PNorm = 101.5367, GNorm = 0.2817, lr_0 = 8.3273e-04
Loss = 4.6215e-02, PNorm = 101.5931, GNorm = 0.5641, lr_0 = 8.3216e-04
Loss = 4.8694e-02, PNorm = 101.6432, GNorm = 0.2953, lr_0 = 8.3159e-04
Loss = 4.7739e-02, PNorm = 101.7075, GNorm = 0.6526, lr_0 = 8.3102e-04
Loss = 4.2268e-02, PNorm = 101.7632, GNorm = 0.2458, lr_0 = 8.3045e-04
Loss = 4.6320e-02, PNorm = 101.8261, GNorm = 0.3400, lr_0 = 8.2988e-04
Loss = 5.0541e-02, PNorm = 101.8872, GNorm = 0.5584, lr_0 = 8.2932e-04
Loss = 4.6098e-02, PNorm = 101.9572, GNorm = 0.5799, lr_0 = 8.2875e-04
Loss = 5.0437e-02, PNorm = 102.0152, GNorm = 0.6657, lr_0 = 8.2818e-04
Loss = 4.5179e-02, PNorm = 102.0895, GNorm = 0.6557, lr_0 = 8.2761e-04
Loss = 4.0957e-02, PNorm = 102.1423, GNorm = 0.7570, lr_0 = 8.2705e-04
Loss = 5.5987e-02, PNorm = 102.2110, GNorm = 1.1504, lr_0 = 8.2648e-04
Loss = 4.4824e-02, PNorm = 102.2686, GNorm = 0.5461, lr_0 = 8.2591e-04
Loss = 4.1680e-02, PNorm = 102.3294, GNorm = 0.6072, lr_0 = 8.2535e-04
Loss = 4.8160e-02, PNorm = 102.3926, GNorm = 0.4317, lr_0 = 8.2478e-04
Loss = 4.0854e-02, PNorm = 102.4480, GNorm = 0.4142, lr_0 = 8.2422e-04
Loss = 4.3507e-02, PNorm = 102.5139, GNorm = 0.4694, lr_0 = 8.2365e-04
Loss = 5.1895e-02, PNorm = 102.5820, GNorm = 0.5860, lr_0 = 8.2309e-04
Loss = 4.3573e-02, PNorm = 102.6415, GNorm = 0.5898, lr_0 = 8.2252e-04
Loss = 4.5078e-02, PNorm = 102.7005, GNorm = 0.2844, lr_0 = 8.2196e-04
Loss = 5.3086e-02, PNorm = 102.7551, GNorm = 0.6834, lr_0 = 8.2140e-04
Loss = 4.1375e-02, PNorm = 102.8207, GNorm = 0.4179, lr_0 = 8.2084e-04
Loss = 4.9037e-02, PNorm = 102.8886, GNorm = 0.3040, lr_0 = 8.2027e-04
Loss = 4.4812e-02, PNorm = 102.9409, GNorm = 0.2795, lr_0 = 8.1971e-04
Loss = 5.1895e-02, PNorm = 103.0053, GNorm = 0.3772, lr_0 = 8.1915e-04
Loss = 5.2985e-02, PNorm = 103.0677, GNorm = 0.5794, lr_0 = 8.1859e-04
Loss = 4.8549e-02, PNorm = 103.1363, GNorm = 0.3902, lr_0 = 8.1803e-04
Loss = 4.7659e-02, PNorm = 103.1958, GNorm = 0.2662, lr_0 = 8.1747e-04
Loss = 4.1713e-02, PNorm = 103.2593, GNorm = 0.3647, lr_0 = 8.1691e-04
Loss = 4.2477e-02, PNorm = 103.3224, GNorm = 0.2409, lr_0 = 8.1635e-04
Loss = 4.3970e-02, PNorm = 103.3861, GNorm = 0.3526, lr_0 = 8.1579e-04
Loss = 5.0131e-02, PNorm = 103.4477, GNorm = 0.4038, lr_0 = 8.1523e-04
Loss = 4.4510e-02, PNorm = 103.5055, GNorm = 0.5772, lr_0 = 8.1467e-04
Loss = 4.7456e-02, PNorm = 103.5709, GNorm = 0.3609, lr_0 = 8.1411e-04
Loss = 4.4233e-02, PNorm = 103.6397, GNorm = 0.3992, lr_0 = 8.1355e-04
Loss = 4.7064e-02, PNorm = 103.7065, GNorm = 0.3297, lr_0 = 8.1300e-04
Loss = 4.7508e-02, PNorm = 103.7713, GNorm = 0.3621, lr_0 = 8.1244e-04
Loss = 5.4790e-02, PNorm = 103.8366, GNorm = 0.4755, lr_0 = 8.1188e-04
Loss = 5.0770e-02, PNorm = 103.9154, GNorm = 0.4998, lr_0 = 8.1133e-04
Loss = 5.2445e-02, PNorm = 103.9850, GNorm = 0.6062, lr_0 = 8.1077e-04
Loss = 4.7041e-02, PNorm = 104.0502, GNorm = 0.3841, lr_0 = 8.1022e-04
Loss = 4.3736e-02, PNorm = 104.1154, GNorm = 0.7086, lr_0 = 8.0966e-04
Loss = 5.1020e-02, PNorm = 104.1844, GNorm = 0.4496, lr_0 = 8.0911e-04
Loss = 6.0956e-02, PNorm = 104.2546, GNorm = 0.6766, lr_0 = 8.0855e-04
Loss = 5.3244e-02, PNorm = 104.3448, GNorm = 0.6511, lr_0 = 8.0800e-04
Loss = 5.0153e-02, PNorm = 104.4319, GNorm = 0.5715, lr_0 = 8.0745e-04
Loss = 4.1982e-02, PNorm = 104.5095, GNorm = 0.3760, lr_0 = 8.0689e-04
Loss = 5.2636e-02, PNorm = 104.5745, GNorm = 0.3684, lr_0 = 8.0634e-04
Loss = 4.3261e-02, PNorm = 104.6336, GNorm = 0.6970, lr_0 = 8.0579e-04
Loss = 5.0946e-02, PNorm = 104.6992, GNorm = 0.3056, lr_0 = 8.0523e-04
Loss = 4.9458e-02, PNorm = 104.7691, GNorm = 0.5718, lr_0 = 8.0468e-04
Loss = 5.5557e-02, PNorm = 104.8476, GNorm = 0.3345, lr_0 = 8.0413e-04
Loss = 4.8358e-02, PNorm = 104.9197, GNorm = 0.3145, lr_0 = 8.0358e-04
Loss = 5.1010e-02, PNorm = 104.9917, GNorm = 0.3276, lr_0 = 8.0303e-04
Loss = 5.3406e-02, PNorm = 105.0686, GNorm = 0.3681, lr_0 = 8.0248e-04
Loss = 4.5957e-02, PNorm = 105.1388, GNorm = 0.2798, lr_0 = 8.0193e-04
Loss = 5.0572e-02, PNorm = 105.2068, GNorm = 0.7339, lr_0 = 8.0138e-04
Loss = 4.7717e-02, PNorm = 105.2758, GNorm = 0.2600, lr_0 = 8.0083e-04
Loss = 4.7767e-02, PNorm = 105.3549, GNorm = 0.8899, lr_0 = 8.0028e-04
Loss = 5.2324e-02, PNorm = 105.4254, GNorm = 0.6627, lr_0 = 7.9974e-04
Loss = 4.6426e-02, PNorm = 105.5027, GNorm = 0.3250, lr_0 = 7.9919e-04
Loss = 5.5957e-02, PNorm = 105.5776, GNorm = 0.9056, lr_0 = 7.9864e-04
Loss = 5.5547e-02, PNorm = 105.6610, GNorm = 0.4179, lr_0 = 7.9809e-04
Loss = 5.0291e-02, PNorm = 105.7384, GNorm = 0.4117, lr_0 = 7.9755e-04
Loss = 6.1504e-02, PNorm = 105.8193, GNorm = 0.4899, lr_0 = 7.9700e-04
Loss = 4.4207e-02, PNorm = 105.8969, GNorm = 0.5847, lr_0 = 7.9645e-04
Loss = 5.0296e-02, PNorm = 105.9634, GNorm = 0.2995, lr_0 = 7.9591e-04
Loss = 5.2414e-02, PNorm = 106.0333, GNorm = 0.5272, lr_0 = 7.9536e-04
Loss = 5.3372e-02, PNorm = 106.1037, GNorm = 0.5042, lr_0 = 7.9482e-04
Loss = 5.3064e-02, PNorm = 106.1745, GNorm = 0.7865, lr_0 = 7.9427e-04
Loss = 6.0782e-02, PNorm = 106.2547, GNorm = 0.8407, lr_0 = 7.9373e-04
Loss = 5.5135e-02, PNorm = 106.3361, GNorm = 0.5616, lr_0 = 7.9319e-04
Loss = 5.1111e-02, PNorm = 106.4214, GNorm = 0.6975, lr_0 = 7.9264e-04
Loss = 5.4004e-02, PNorm = 106.4977, GNorm = 0.4932, lr_0 = 7.9210e-04
Loss = 5.0124e-02, PNorm = 106.5766, GNorm = 0.4859, lr_0 = 7.9156e-04
Loss = 5.5963e-02, PNorm = 106.6643, GNorm = 0.6670, lr_0 = 7.9101e-04
Loss = 5.6461e-02, PNorm = 106.7527, GNorm = 0.7673, lr_0 = 7.9047e-04
Loss = 5.5870e-02, PNorm = 106.8469, GNorm = 0.3079, lr_0 = 7.8993e-04
Loss = 5.3043e-02, PNorm = 106.9332, GNorm = 0.3678, lr_0 = 7.8939e-04
Loss = 5.5006e-02, PNorm = 107.0089, GNorm = 0.2754, lr_0 = 7.8885e-04
Loss = 5.4623e-02, PNorm = 107.0846, GNorm = 0.6094, lr_0 = 7.8831e-04
Loss = 6.3351e-02, PNorm = 107.1688, GNorm = 0.3840, lr_0 = 7.8777e-04
Loss = 5.1664e-02, PNorm = 107.2496, GNorm = 0.5980, lr_0 = 7.8723e-04
Loss = 5.4081e-02, PNorm = 107.3353, GNorm = 0.4066, lr_0 = 7.8669e-04
Loss = 5.5577e-02, PNorm = 107.4183, GNorm = 0.5677, lr_0 = 7.8615e-04
Loss = 6.2235e-02, PNorm = 107.4913, GNorm = 0.4433, lr_0 = 7.8561e-04
Loss = 5.3160e-02, PNorm = 107.5807, GNorm = 0.4154, lr_0 = 7.8507e-04
Loss = 5.9109e-02, PNorm = 107.6563, GNorm = 0.5379, lr_0 = 7.8454e-04
Loss = 6.2919e-02, PNorm = 107.7460, GNorm = 0.4059, lr_0 = 7.8400e-04
Loss = 6.3509e-02, PNorm = 107.8314, GNorm = 0.4154, lr_0 = 7.8346e-04
Loss = 5.8241e-02, PNorm = 107.9227, GNorm = 0.7948, lr_0 = 7.8293e-04
Loss = 5.5369e-02, PNorm = 108.0051, GNorm = 0.4399, lr_0 = 7.8239e-04
Loss = 5.7454e-02, PNorm = 108.1020, GNorm = 0.8603, lr_0 = 7.8185e-04
Loss = 5.9687e-02, PNorm = 108.1815, GNorm = 0.6305, lr_0 = 7.8132e-04
Validation mae = 0.503854
Epoch 5
Loss = 4.3216e-02, PNorm = 108.2511, GNorm = 0.5460, lr_0 = 7.8078e-04
Loss = 3.9768e-02, PNorm = 108.3093, GNorm = 0.4415, lr_0 = 7.8025e-04
Loss = 4.0376e-02, PNorm = 108.3683, GNorm = 0.2547, lr_0 = 7.7971e-04
Loss = 3.5344e-02, PNorm = 108.4197, GNorm = 0.2641, lr_0 = 7.7918e-04
Loss = 3.4463e-02, PNorm = 108.4735, GNorm = 0.2162, lr_0 = 7.7864e-04
Loss = 3.2897e-02, PNorm = 108.5229, GNorm = 0.2934, lr_0 = 7.7811e-04
Loss = 3.7836e-02, PNorm = 108.5704, GNorm = 0.2981, lr_0 = 7.7758e-04
Loss = 3.3134e-02, PNorm = 108.6174, GNorm = 0.4033, lr_0 = 7.7705e-04
Loss = 3.3955e-02, PNorm = 108.6639, GNorm = 1.2999, lr_0 = 7.7651e-04
Loss = 3.6402e-02, PNorm = 108.7068, GNorm = 0.5228, lr_0 = 7.7598e-04
Loss = 3.5173e-02, PNorm = 108.7546, GNorm = 0.3556, lr_0 = 7.7545e-04
Loss = 3.9395e-02, PNorm = 108.8035, GNorm = 0.6734, lr_0 = 7.7492e-04
Loss = 3.7179e-02, PNorm = 108.8555, GNorm = 0.5712, lr_0 = 7.7439e-04
Loss = 3.6765e-02, PNorm = 108.9051, GNorm = 0.6231, lr_0 = 7.7386e-04
Loss = 4.4399e-02, PNorm = 108.9538, GNorm = 0.6534, lr_0 = 7.7333e-04
Loss = 3.6023e-02, PNorm = 109.0007, GNorm = 0.2605, lr_0 = 7.7280e-04
Loss = 4.2498e-02, PNorm = 109.0558, GNorm = 0.5883, lr_0 = 7.7227e-04
Loss = 3.6217e-02, PNorm = 109.1078, GNorm = 0.4021, lr_0 = 7.7174e-04
Loss = 3.3284e-02, PNorm = 109.1662, GNorm = 0.5009, lr_0 = 7.7121e-04
Loss = 3.8538e-02, PNorm = 109.2206, GNorm = 0.4358, lr_0 = 7.7068e-04
Loss = 3.5273e-02, PNorm = 109.2673, GNorm = 0.5849, lr_0 = 7.7015e-04
Loss = 3.3414e-02, PNorm = 109.3234, GNorm = 0.4162, lr_0 = 7.6963e-04
Loss = 3.5771e-02, PNorm = 109.3736, GNorm = 0.2436, lr_0 = 7.6910e-04
Loss = 3.6936e-02, PNorm = 109.4228, GNorm = 0.5944, lr_0 = 7.6857e-04
Loss = 3.3000e-02, PNorm = 109.4817, GNorm = 0.4368, lr_0 = 7.6805e-04
Loss = 3.2334e-02, PNorm = 109.5300, GNorm = 0.3418, lr_0 = 7.6752e-04
Loss = 3.5891e-02, PNorm = 109.5837, GNorm = 0.4239, lr_0 = 7.6699e-04
Loss = 3.5471e-02, PNorm = 109.6399, GNorm = 0.2872, lr_0 = 7.6647e-04
Loss = 3.0811e-02, PNorm = 109.6960, GNorm = 0.2573, lr_0 = 7.6594e-04
Loss = 3.8339e-02, PNorm = 109.7459, GNorm = 0.2814, lr_0 = 7.6542e-04
Loss = 3.1752e-02, PNorm = 109.8035, GNorm = 0.4232, lr_0 = 7.6489e-04
Loss = 3.3207e-02, PNorm = 109.8532, GNorm = 0.4582, lr_0 = 7.6437e-04
Loss = 3.1010e-02, PNorm = 109.8981, GNorm = 0.4032, lr_0 = 7.6385e-04
Loss = 3.9349e-02, PNorm = 109.9494, GNorm = 1.0718, lr_0 = 7.6332e-04
Loss = 3.3356e-02, PNorm = 109.9997, GNorm = 0.6930, lr_0 = 7.6280e-04
Loss = 3.1412e-02, PNorm = 110.0587, GNorm = 0.6011, lr_0 = 7.6228e-04
Loss = 3.0229e-02, PNorm = 110.1100, GNorm = 0.5409, lr_0 = 7.6176e-04
Loss = 3.8526e-02, PNorm = 110.1600, GNorm = 0.2546, lr_0 = 7.6123e-04
Loss = 4.1412e-02, PNorm = 110.2078, GNorm = 0.6147, lr_0 = 7.6071e-04
Loss = 3.7607e-02, PNorm = 110.2594, GNorm = 0.2621, lr_0 = 7.6019e-04
Loss = 3.8189e-02, PNorm = 110.3166, GNorm = 0.2918, lr_0 = 7.5967e-04
Loss = 3.5610e-02, PNorm = 110.3767, GNorm = 0.6666, lr_0 = 7.5915e-04
Loss = 3.0561e-02, PNorm = 110.4315, GNorm = 0.3194, lr_0 = 7.5863e-04
Loss = 3.3213e-02, PNorm = 110.4837, GNorm = 0.2293, lr_0 = 7.5811e-04
Loss = 3.8894e-02, PNorm = 110.5440, GNorm = 0.8045, lr_0 = 7.5759e-04
Loss = 3.5773e-02, PNorm = 110.6061, GNorm = 0.6774, lr_0 = 7.5707e-04
Loss = 3.2505e-02, PNorm = 110.6618, GNorm = 0.3269, lr_0 = 7.5655e-04
Loss = 3.7987e-02, PNorm = 110.7195, GNorm = 0.7299, lr_0 = 7.5603e-04
Loss = 3.4854e-02, PNorm = 110.7826, GNorm = 0.4564, lr_0 = 7.5552e-04
Loss = 3.0544e-02, PNorm = 110.8379, GNorm = 0.3728, lr_0 = 7.5500e-04
Loss = 3.5175e-02, PNorm = 110.9015, GNorm = 0.4438, lr_0 = 7.5448e-04
Loss = 3.6942e-02, PNorm = 110.9629, GNorm = 0.2997, lr_0 = 7.5397e-04
Loss = 3.6689e-02, PNorm = 111.0241, GNorm = 0.2862, lr_0 = 7.5345e-04
Loss = 3.1999e-02, PNorm = 111.0817, GNorm = 0.2600, lr_0 = 7.5293e-04
Loss = 3.1566e-02, PNorm = 111.1312, GNorm = 0.2314, lr_0 = 7.5242e-04
Loss = 3.3908e-02, PNorm = 111.1878, GNorm = 0.4365, lr_0 = 7.5190e-04
Loss = 3.6400e-02, PNorm = 111.2414, GNorm = 0.3054, lr_0 = 7.5139e-04
Loss = 3.2574e-02, PNorm = 111.2943, GNorm = 0.3451, lr_0 = 7.5087e-04
Loss = 3.7319e-02, PNorm = 111.3447, GNorm = 0.6183, lr_0 = 7.5036e-04
Loss = 3.4111e-02, PNorm = 111.4046, GNorm = 0.2870, lr_0 = 7.4984e-04
Loss = 3.5919e-02, PNorm = 111.4674, GNorm = 0.4665, lr_0 = 7.4933e-04
Loss = 4.0121e-02, PNorm = 111.5272, GNorm = 0.4453, lr_0 = 7.4882e-04
Loss = 3.2822e-02, PNorm = 111.5850, GNorm = 0.3452, lr_0 = 7.4830e-04
Loss = 3.3752e-02, PNorm = 111.6459, GNorm = 0.4288, lr_0 = 7.4779e-04
Loss = 3.5861e-02, PNorm = 111.7009, GNorm = 0.7109, lr_0 = 7.4728e-04
Loss = 3.4173e-02, PNorm = 111.7577, GNorm = 0.6007, lr_0 = 7.4677e-04
Loss = 3.4113e-02, PNorm = 111.8138, GNorm = 0.7071, lr_0 = 7.4625e-04
Loss = 3.9122e-02, PNorm = 111.8711, GNorm = 0.4604, lr_0 = 7.4574e-04
Loss = 3.3673e-02, PNorm = 111.9321, GNorm = 0.5108, lr_0 = 7.4523e-04
Loss = 3.3822e-02, PNorm = 111.9967, GNorm = 0.4615, lr_0 = 7.4472e-04
Loss = 3.4299e-02, PNorm = 112.0539, GNorm = 0.7684, lr_0 = 7.4421e-04
Loss = 3.9865e-02, PNorm = 112.1147, GNorm = 0.3082, lr_0 = 7.4370e-04
Loss = 3.3264e-02, PNorm = 112.1750, GNorm = 0.2828, lr_0 = 7.4319e-04
Loss = 3.0657e-02, PNorm = 112.2353, GNorm = 0.3633, lr_0 = 7.4268e-04
Loss = 3.5529e-02, PNorm = 112.2926, GNorm = 0.5007, lr_0 = 7.4217e-04
Loss = 3.7056e-02, PNorm = 112.3541, GNorm = 0.3462, lr_0 = 7.4167e-04
Loss = 3.9562e-02, PNorm = 112.4154, GNorm = 0.5788, lr_0 = 7.4116e-04
Loss = 3.6932e-02, PNorm = 112.4789, GNorm = 0.2739, lr_0 = 7.4065e-04
Loss = 3.6303e-02, PNorm = 112.5370, GNorm = 0.8271, lr_0 = 7.4014e-04
Loss = 4.3401e-02, PNorm = 112.6078, GNorm = 0.4077, lr_0 = 7.3964e-04
Loss = 3.9943e-02, PNorm = 112.6754, GNorm = 0.6748, lr_0 = 7.3913e-04
Loss = 3.6308e-02, PNorm = 112.7462, GNorm = 0.6111, lr_0 = 7.3862e-04
Loss = 4.2306e-02, PNorm = 112.8055, GNorm = 0.2497, lr_0 = 7.3812e-04
Loss = 3.8243e-02, PNorm = 112.8629, GNorm = 0.2144, lr_0 = 7.3761e-04
Loss = 4.3652e-02, PNorm = 112.9274, GNorm = 0.5161, lr_0 = 7.3711e-04
Loss = 3.5335e-02, PNorm = 112.9904, GNorm = 0.5157, lr_0 = 7.3660e-04
Loss = 4.2465e-02, PNorm = 113.0563, GNorm = 0.6068, lr_0 = 7.3610e-04
Loss = 3.7531e-02, PNorm = 113.1345, GNorm = 0.4399, lr_0 = 7.3559e-04
Loss = 3.1578e-02, PNorm = 113.2085, GNorm = 0.3813, lr_0 = 7.3509e-04
Loss = 3.4189e-02, PNorm = 113.2735, GNorm = 0.3169, lr_0 = 7.3458e-04
Loss = 3.1823e-02, PNorm = 113.3312, GNorm = 0.3915, lr_0 = 7.3408e-04
Loss = 4.7899e-02, PNorm = 113.3910, GNorm = 0.5453, lr_0 = 7.3358e-04
Loss = 3.7823e-02, PNorm = 113.4579, GNorm = 0.7023, lr_0 = 7.3308e-04
Loss = 4.0151e-02, PNorm = 113.5308, GNorm = 0.5805, lr_0 = 7.3257e-04
Loss = 3.8627e-02, PNorm = 113.6009, GNorm = 0.2905, lr_0 = 7.3207e-04
Loss = 4.3033e-02, PNorm = 113.6582, GNorm = 0.5471, lr_0 = 7.3157e-04
Loss = 3.5705e-02, PNorm = 113.7290, GNorm = 0.5264, lr_0 = 7.3107e-04
Loss = 4.9691e-02, PNorm = 113.7971, GNorm = 0.2989, lr_0 = 7.3057e-04
Loss = 3.4726e-02, PNorm = 113.8717, GNorm = 0.4167, lr_0 = 7.3007e-04
Loss = 4.1628e-02, PNorm = 113.9392, GNorm = 0.2804, lr_0 = 7.2957e-04
Loss = 4.2995e-02, PNorm = 114.0070, GNorm = 0.4072, lr_0 = 7.2907e-04
Loss = 3.6534e-02, PNorm = 114.0670, GNorm = 0.8404, lr_0 = 7.2857e-04
Loss = 3.8283e-02, PNorm = 114.1336, GNorm = 0.4162, lr_0 = 7.2807e-04
Loss = 3.3879e-02, PNorm = 114.1955, GNorm = 0.7133, lr_0 = 7.2757e-04
Loss = 3.9985e-02, PNorm = 114.2646, GNorm = 0.2943, lr_0 = 7.2707e-04
Loss = 3.8717e-02, PNorm = 114.3291, GNorm = 0.5647, lr_0 = 7.2657e-04
Loss = 3.7997e-02, PNorm = 114.3950, GNorm = 0.5680, lr_0 = 7.2608e-04
Loss = 3.6308e-02, PNorm = 114.4605, GNorm = 0.5474, lr_0 = 7.2558e-04
Loss = 4.0376e-02, PNorm = 114.5232, GNorm = 0.2974, lr_0 = 7.2508e-04
Loss = 3.6357e-02, PNorm = 114.5925, GNorm = 0.3180, lr_0 = 7.2458e-04
Loss = 4.4851e-02, PNorm = 114.6623, GNorm = 0.7245, lr_0 = 7.2409e-04
Loss = 4.2536e-02, PNorm = 114.7351, GNorm = 0.4909, lr_0 = 7.2359e-04
Loss = 3.9697e-02, PNorm = 114.8111, GNorm = 0.6621, lr_0 = 7.2310e-04
Loss = 3.7723e-02, PNorm = 114.8824, GNorm = 0.2411, lr_0 = 7.2260e-04
Loss = 4.2306e-02, PNorm = 114.9530, GNorm = 0.4881, lr_0 = 7.2211e-04
Loss = 3.4610e-02, PNorm = 115.0229, GNorm = 0.1985, lr_0 = 7.2161e-04
Loss = 4.0887e-02, PNorm = 115.0885, GNorm = 0.3175, lr_0 = 7.2112e-04
Loss = 4.3319e-02, PNorm = 115.1581, GNorm = 0.4400, lr_0 = 7.2062e-04
Loss = 4.9099e-02, PNorm = 115.2358, GNorm = 0.4966, lr_0 = 7.2013e-04
Loss = 3.4189e-02, PNorm = 115.3133, GNorm = 0.4642, lr_0 = 7.1964e-04
Validation mae = 0.498383
Epoch 6
Loss = 3.0208e-02, PNorm = 115.3709, GNorm = 0.2867, lr_0 = 7.1914e-04
Loss = 2.3818e-02, PNorm = 115.4244, GNorm = 0.3145, lr_0 = 7.1865e-04
Loss = 3.0360e-02, PNorm = 115.4650, GNorm = 0.3892, lr_0 = 7.1816e-04
Loss = 2.8315e-02, PNorm = 115.5133, GNorm = 0.2528, lr_0 = 7.1767e-04
Loss = 3.3239e-02, PNorm = 115.5604, GNorm = 0.3063, lr_0 = 7.1717e-04
Loss = 2.9778e-02, PNorm = 115.6068, GNorm = 0.2589, lr_0 = 7.1668e-04
Loss = 3.0456e-02, PNorm = 115.6524, GNorm = 0.8063, lr_0 = 7.1619e-04
Loss = 2.5322e-02, PNorm = 115.6974, GNorm = 0.3283, lr_0 = 7.1570e-04
Loss = 2.4950e-02, PNorm = 115.7408, GNorm = 0.3579, lr_0 = 7.1521e-04
Loss = 2.9516e-02, PNorm = 115.7835, GNorm = 0.4866, lr_0 = 7.1472e-04
Loss = 2.3604e-02, PNorm = 115.8226, GNorm = 0.3700, lr_0 = 7.1423e-04
Loss = 2.2438e-02, PNorm = 115.8629, GNorm = 0.3630, lr_0 = 7.1374e-04
Loss = 3.2289e-02, PNorm = 115.9103, GNorm = 0.7398, lr_0 = 7.1325e-04
Loss = 3.0737e-02, PNorm = 115.9533, GNorm = 0.3905, lr_0 = 7.1277e-04
Loss = 2.6703e-02, PNorm = 116.0059, GNorm = 0.2489, lr_0 = 7.1228e-04
Loss = 2.5288e-02, PNorm = 116.0512, GNorm = 0.2845, lr_0 = 7.1179e-04
Loss = 2.4980e-02, PNorm = 116.0954, GNorm = 0.2510, lr_0 = 7.1130e-04
Loss = 2.7237e-02, PNorm = 116.1381, GNorm = 0.3606, lr_0 = 7.1081e-04
Loss = 2.5433e-02, PNorm = 116.1801, GNorm = 0.1970, lr_0 = 7.1033e-04
Loss = 2.9325e-02, PNorm = 116.2198, GNorm = 0.3452, lr_0 = 7.0984e-04
Loss = 2.6177e-02, PNorm = 116.2669, GNorm = 0.3062, lr_0 = 7.0935e-04
Loss = 3.0920e-02, PNorm = 116.3093, GNorm = 0.1953, lr_0 = 7.0887e-04
Loss = 2.6569e-02, PNorm = 116.3562, GNorm = 0.9638, lr_0 = 7.0838e-04
Loss = 2.4542e-02, PNorm = 116.4004, GNorm = 0.3938, lr_0 = 7.0790e-04
Loss = 2.3858e-02, PNorm = 116.4412, GNorm = 0.4805, lr_0 = 7.0741e-04
Loss = 2.8160e-02, PNorm = 116.4894, GNorm = 0.4084, lr_0 = 7.0693e-04
Loss = 2.5358e-02, PNorm = 116.5410, GNorm = 0.5854, lr_0 = 7.0644e-04
Loss = 2.7385e-02, PNorm = 116.5857, GNorm = 0.4009, lr_0 = 7.0596e-04
Loss = 3.0992e-02, PNorm = 116.6337, GNorm = 0.4845, lr_0 = 7.0548e-04
Loss = 2.6298e-02, PNorm = 116.6732, GNorm = 0.7396, lr_0 = 7.0499e-04
Loss = 2.4123e-02, PNorm = 116.7158, GNorm = 0.8695, lr_0 = 7.0451e-04
Loss = 3.0023e-02, PNorm = 116.7626, GNorm = 0.3822, lr_0 = 7.0403e-04
Loss = 3.0421e-02, PNorm = 116.8117, GNorm = 0.3796, lr_0 = 7.0354e-04
Loss = 2.9399e-02, PNorm = 116.8603, GNorm = 0.6962, lr_0 = 7.0306e-04
Loss = 2.6096e-02, PNorm = 116.9150, GNorm = 0.3134, lr_0 = 7.0258e-04
Loss = 2.8889e-02, PNorm = 116.9652, GNorm = 0.6856, lr_0 = 7.0210e-04
Loss = 2.8211e-02, PNorm = 117.0156, GNorm = 0.5575, lr_0 = 7.0162e-04
Loss = 2.5078e-02, PNorm = 117.0663, GNorm = 0.5423, lr_0 = 7.0114e-04
Loss = 2.5946e-02, PNorm = 117.1135, GNorm = 0.6038, lr_0 = 7.0066e-04
Loss = 2.5202e-02, PNorm = 117.1603, GNorm = 0.5508, lr_0 = 7.0018e-04
Loss = 2.8106e-02, PNorm = 117.2018, GNorm = 0.2803, lr_0 = 6.9970e-04
Loss = 3.2177e-02, PNorm = 117.2488, GNorm = 0.6927, lr_0 = 6.9922e-04
Loss = 2.9063e-02, PNorm = 117.2923, GNorm = 0.2400, lr_0 = 6.9874e-04
Loss = 2.9036e-02, PNorm = 117.3356, GNorm = 0.5878, lr_0 = 6.9826e-04
Loss = 2.8214e-02, PNorm = 117.3818, GNorm = 0.4861, lr_0 = 6.9778e-04
Loss = 2.6230e-02, PNorm = 117.4299, GNorm = 0.8431, lr_0 = 6.9730e-04
Loss = 2.2755e-02, PNorm = 117.4803, GNorm = 0.4899, lr_0 = 6.9683e-04
Loss = 2.5981e-02, PNorm = 117.5320, GNorm = 0.6815, lr_0 = 6.9635e-04
Loss = 2.5656e-02, PNorm = 117.5910, GNorm = 0.6162, lr_0 = 6.9587e-04
Loss = 2.7446e-02, PNorm = 117.6426, GNorm = 0.5150, lr_0 = 6.9540e-04
Loss = 2.6264e-02, PNorm = 117.6868, GNorm = 0.3276, lr_0 = 6.9492e-04
Loss = 3.0655e-02, PNorm = 117.7362, GNorm = 0.5773, lr_0 = 6.9444e-04
Loss = 2.5169e-02, PNorm = 117.7852, GNorm = 0.3379, lr_0 = 6.9397e-04
Loss = 2.5492e-02, PNorm = 117.8426, GNorm = 0.1690, lr_0 = 6.9349e-04
Loss = 2.7796e-02, PNorm = 117.8906, GNorm = 0.4830, lr_0 = 6.9302e-04
Loss = 3.0042e-02, PNorm = 117.9369, GNorm = 0.4660, lr_0 = 6.9254e-04
Loss = 2.7781e-02, PNorm = 117.9830, GNorm = 0.2267, lr_0 = 6.9207e-04
Loss = 2.9202e-02, PNorm = 118.0336, GNorm = 0.3817, lr_0 = 6.9159e-04
Loss = 2.8858e-02, PNorm = 118.0792, GNorm = 0.2926, lr_0 = 6.9112e-04
Loss = 2.7193e-02, PNorm = 118.1246, GNorm = 0.6140, lr_0 = 6.9065e-04
Loss = 2.8629e-02, PNorm = 118.1750, GNorm = 0.2393, lr_0 = 6.9017e-04
Loss = 2.7531e-02, PNorm = 118.2339, GNorm = 0.4651, lr_0 = 6.8970e-04
Loss = 2.6270e-02, PNorm = 118.2931, GNorm = 0.6415, lr_0 = 6.8923e-04
Loss = 2.5336e-02, PNorm = 118.3497, GNorm = 0.6807, lr_0 = 6.8876e-04
Loss = 2.8018e-02, PNorm = 118.4027, GNorm = 0.1918, lr_0 = 6.8828e-04
Loss = 2.6800e-02, PNorm = 118.4572, GNorm = 0.6491, lr_0 = 6.8781e-04
Loss = 2.8453e-02, PNorm = 118.5106, GNorm = 0.4667, lr_0 = 6.8734e-04
Loss = 2.9405e-02, PNorm = 118.5647, GNorm = 0.5282, lr_0 = 6.8687e-04
Loss = 2.6651e-02, PNorm = 118.6185, GNorm = 0.3293, lr_0 = 6.8640e-04
Loss = 2.9794e-02, PNorm = 118.6690, GNorm = 0.4826, lr_0 = 6.8593e-04
Loss = 2.9501e-02, PNorm = 118.7238, GNorm = 0.3081, lr_0 = 6.8546e-04
Loss = 2.8548e-02, PNorm = 118.7817, GNorm = 0.2462, lr_0 = 6.8499e-04
Loss = 2.7797e-02, PNorm = 118.8408, GNorm = 0.3542, lr_0 = 6.8452e-04
Loss = 3.0009e-02, PNorm = 118.8956, GNorm = 0.9025, lr_0 = 6.8405e-04
Loss = 3.4545e-02, PNorm = 118.9606, GNorm = 1.0172, lr_0 = 6.8358e-04
Loss = 2.4885e-02, PNorm = 119.0177, GNorm = 0.1713, lr_0 = 6.8312e-04
Loss = 3.2163e-02, PNorm = 119.0684, GNorm = 0.5818, lr_0 = 6.8265e-04
Loss = 3.1056e-02, PNorm = 119.1260, GNorm = 0.3137, lr_0 = 6.8218e-04
Loss = 2.8414e-02, PNorm = 119.1898, GNorm = 0.4957, lr_0 = 6.8171e-04
Loss = 3.2039e-02, PNorm = 119.2553, GNorm = 0.5255, lr_0 = 6.8125e-04
Loss = 3.2376e-02, PNorm = 119.3164, GNorm = 0.6147, lr_0 = 6.8078e-04
Loss = 3.2862e-02, PNorm = 119.3684, GNorm = 0.7293, lr_0 = 6.8031e-04
Loss = 3.0274e-02, PNorm = 119.4328, GNorm = 0.2299, lr_0 = 6.7985e-04
Loss = 2.6002e-02, PNorm = 119.4927, GNorm = 0.7477, lr_0 = 6.7938e-04
Loss = 2.8052e-02, PNorm = 119.5523, GNorm = 0.3220, lr_0 = 6.7892e-04
Loss = 3.4107e-02, PNorm = 119.6121, GNorm = 0.6288, lr_0 = 6.7845e-04
Loss = 2.6592e-02, PNorm = 119.6691, GNorm = 0.2622, lr_0 = 6.7799e-04
Loss = 2.9904e-02, PNorm = 119.7300, GNorm = 0.5317, lr_0 = 6.7752e-04
Loss = 2.9563e-02, PNorm = 119.7836, GNorm = 0.4684, lr_0 = 6.7706e-04
Loss = 2.9284e-02, PNorm = 119.8429, GNorm = 0.2559, lr_0 = 6.7659e-04
Loss = 3.0671e-02, PNorm = 119.8965, GNorm = 0.2543, lr_0 = 6.7613e-04
Loss = 2.7150e-02, PNorm = 119.9523, GNorm = 0.3059, lr_0 = 6.7567e-04
Loss = 2.8927e-02, PNorm = 120.0103, GNorm = 0.1960, lr_0 = 6.7520e-04
Loss = 2.9785e-02, PNorm = 120.0734, GNorm = 0.3636, lr_0 = 6.7474e-04
Loss = 2.9022e-02, PNorm = 120.1330, GNorm = 0.3767, lr_0 = 6.7428e-04
Loss = 2.9523e-02, PNorm = 120.1971, GNorm = 0.2842, lr_0 = 6.7382e-04
Loss = 2.6477e-02, PNorm = 120.2617, GNorm = 0.3986, lr_0 = 6.7335e-04
Loss = 3.0638e-02, PNorm = 120.3222, GNorm = 0.6777, lr_0 = 6.7289e-04
Loss = 4.2397e-02, PNorm = 120.3864, GNorm = 0.6982, lr_0 = 6.7243e-04
Loss = 3.3463e-02, PNorm = 120.4429, GNorm = 0.3607, lr_0 = 6.7197e-04
Loss = 3.2744e-02, PNorm = 120.5072, GNorm = 0.5184, lr_0 = 6.7151e-04
Loss = 3.1583e-02, PNorm = 120.5761, GNorm = 0.3919, lr_0 = 6.7105e-04
Loss = 3.0253e-02, PNorm = 120.6376, GNorm = 0.4628, lr_0 = 6.7059e-04
Loss = 3.3566e-02, PNorm = 120.6957, GNorm = 1.0206, lr_0 = 6.7013e-04
Loss = 3.6910e-02, PNorm = 120.7631, GNorm = 0.4166, lr_0 = 6.6967e-04
Loss = 3.1726e-02, PNorm = 120.8362, GNorm = 0.7462, lr_0 = 6.6921e-04
Loss = 3.5267e-02, PNorm = 120.9019, GNorm = 0.8673, lr_0 = 6.6876e-04
Loss = 3.1543e-02, PNorm = 120.9667, GNorm = 0.3734, lr_0 = 6.6830e-04
Loss = 3.1225e-02, PNorm = 121.0271, GNorm = 0.6472, lr_0 = 6.6784e-04
Loss = 3.1972e-02, PNorm = 121.0918, GNorm = 0.6682, lr_0 = 6.6738e-04
Loss = 2.7333e-02, PNorm = 121.1550, GNorm = 0.2435, lr_0 = 6.6693e-04
Loss = 2.9714e-02, PNorm = 121.2142, GNorm = 0.4705, lr_0 = 6.6647e-04
Loss = 2.9917e-02, PNorm = 121.2786, GNorm = 0.2825, lr_0 = 6.6601e-04
Loss = 2.8949e-02, PNorm = 121.3398, GNorm = 0.3570, lr_0 = 6.6556e-04
Loss = 2.7062e-02, PNorm = 121.3923, GNorm = 0.4031, lr_0 = 6.6510e-04
Loss = 3.0137e-02, PNorm = 121.4521, GNorm = 0.3049, lr_0 = 6.6464e-04
Loss = 3.2385e-02, PNorm = 121.5179, GNorm = 0.3106, lr_0 = 6.6419e-04
Loss = 3.4568e-02, PNorm = 121.5751, GNorm = 0.3971, lr_0 = 6.6373e-04
Loss = 3.6888e-02, PNorm = 121.6367, GNorm = 0.2813, lr_0 = 6.6328e-04
Loss = 3.1041e-02, PNorm = 121.6952, GNorm = 0.3116, lr_0 = 6.6282e-04
Validation mae = 0.490954
Epoch 7
Loss = 2.6799e-02, PNorm = 121.7488, GNorm = 0.3480, lr_0 = 6.6237e-04
Loss = 2.1677e-02, PNorm = 121.7890, GNorm = 0.2973, lr_0 = 6.6192e-04
Loss = 2.7183e-02, PNorm = 121.8324, GNorm = 0.2577, lr_0 = 6.6146e-04
Loss = 2.3495e-02, PNorm = 121.8722, GNorm = 0.2866, lr_0 = 6.6101e-04
Loss = 2.1130e-02, PNorm = 121.9034, GNorm = 0.4220, lr_0 = 6.6056e-04
Loss = 2.2299e-02, PNorm = 121.9386, GNorm = 0.1732, lr_0 = 6.6011e-04
Loss = 2.0397e-02, PNorm = 121.9771, GNorm = 0.4122, lr_0 = 6.5965e-04
Loss = 1.9982e-02, PNorm = 122.0131, GNorm = 0.5278, lr_0 = 6.5920e-04
Loss = 1.9971e-02, PNorm = 122.0500, GNorm = 0.2131, lr_0 = 6.5875e-04
Loss = 2.3088e-02, PNorm = 122.0879, GNorm = 0.3522, lr_0 = 6.5830e-04
Loss = 2.2907e-02, PNorm = 122.1281, GNorm = 0.2221, lr_0 = 6.5785e-04
Loss = 2.3651e-02, PNorm = 122.1701, GNorm = 0.5894, lr_0 = 6.5740e-04
Loss = 2.1777e-02, PNorm = 122.2155, GNorm = 0.1920, lr_0 = 6.5695e-04
Loss = 2.4348e-02, PNorm = 122.2543, GNorm = 0.3758, lr_0 = 6.5650e-04
Loss = 2.1363e-02, PNorm = 122.2888, GNorm = 0.3752, lr_0 = 6.5605e-04
Loss = 2.0612e-02, PNorm = 122.3260, GNorm = 0.2671, lr_0 = 6.5560e-04
Loss = 2.0677e-02, PNorm = 122.3684, GNorm = 0.4663, lr_0 = 6.5515e-04
Loss = 2.3323e-02, PNorm = 122.4036, GNorm = 0.5166, lr_0 = 6.5470e-04
Loss = 2.3941e-02, PNorm = 122.4357, GNorm = 0.4999, lr_0 = 6.5425e-04
Loss = 2.3600e-02, PNorm = 122.4765, GNorm = 0.2405, lr_0 = 6.5380e-04
Loss = 2.3681e-02, PNorm = 122.5201, GNorm = 0.3662, lr_0 = 6.5335e-04
Loss = 1.9829e-02, PNorm = 122.5642, GNorm = 0.3906, lr_0 = 6.5291e-04
Loss = 1.8267e-02, PNorm = 122.6050, GNorm = 0.3111, lr_0 = 6.5246e-04
Loss = 2.3262e-02, PNorm = 122.6362, GNorm = 0.3621, lr_0 = 6.5201e-04
Loss = 2.2416e-02, PNorm = 122.6721, GNorm = 0.5402, lr_0 = 6.5157e-04
Loss = 2.3399e-02, PNorm = 122.7020, GNorm = 0.4240, lr_0 = 6.5112e-04
Loss = 2.2247e-02, PNorm = 122.7387, GNorm = 0.2827, lr_0 = 6.5067e-04
Loss = 2.5463e-02, PNorm = 122.7790, GNorm = 0.4247, lr_0 = 6.5023e-04
Loss = 2.0103e-02, PNorm = 122.8240, GNorm = 0.3219, lr_0 = 6.4978e-04
Loss = 3.0012e-02, PNorm = 122.8640, GNorm = 0.9987, lr_0 = 6.4934e-04
Loss = 2.1460e-02, PNorm = 122.9075, GNorm = 0.5216, lr_0 = 6.4889e-04
Loss = 2.2469e-02, PNorm = 122.9522, GNorm = 0.4116, lr_0 = 6.4845e-04
Loss = 2.2352e-02, PNorm = 122.9945, GNorm = 0.3324, lr_0 = 6.4800e-04
Loss = 1.9771e-02, PNorm = 123.0302, GNorm = 0.6584, lr_0 = 6.4756e-04
Loss = 1.9621e-02, PNorm = 123.0717, GNorm = 0.2296, lr_0 = 6.4712e-04
Loss = 2.5570e-02, PNorm = 123.1155, GNorm = 0.6341, lr_0 = 6.4667e-04
Loss = 2.3111e-02, PNorm = 123.1554, GNorm = 0.3171, lr_0 = 6.4623e-04
Loss = 2.0730e-02, PNorm = 123.1895, GNorm = 0.2438, lr_0 = 6.4579e-04
Loss = 2.6428e-02, PNorm = 123.2271, GNorm = 0.4816, lr_0 = 6.4534e-04
Loss = 2.2436e-02, PNorm = 123.2723, GNorm = 0.4259, lr_0 = 6.4490e-04
Loss = 2.0736e-02, PNorm = 123.3178, GNorm = 0.2543, lr_0 = 6.4446e-04
Loss = 2.0841e-02, PNorm = 123.3624, GNorm = 0.4779, lr_0 = 6.4402e-04
Loss = 2.0784e-02, PNorm = 123.4022, GNorm = 0.6681, lr_0 = 6.4358e-04
Loss = 2.2402e-02, PNorm = 123.4493, GNorm = 0.1646, lr_0 = 6.4314e-04
Loss = 2.2056e-02, PNorm = 123.4931, GNorm = 0.2389, lr_0 = 6.4270e-04
Loss = 2.4774e-02, PNorm = 123.5354, GNorm = 0.5128, lr_0 = 6.4226e-04
Loss = 2.1582e-02, PNorm = 123.5756, GNorm = 0.3502, lr_0 = 6.4182e-04
Loss = 2.0616e-02, PNorm = 123.6196, GNorm = 0.4061, lr_0 = 6.4138e-04
Loss = 1.9399e-02, PNorm = 123.6615, GNorm = 0.3975, lr_0 = 6.4094e-04
Loss = 2.1731e-02, PNorm = 123.7028, GNorm = 0.1902, lr_0 = 6.4050e-04
Loss = 2.2436e-02, PNorm = 123.7388, GNorm = 0.3574, lr_0 = 6.4006e-04
Loss = 2.4164e-02, PNorm = 123.7778, GNorm = 0.3181, lr_0 = 6.3962e-04
Loss = 2.0941e-02, PNorm = 123.8248, GNorm = 0.5332, lr_0 = 6.3918e-04
Loss = 2.6841e-02, PNorm = 123.8689, GNorm = 0.3972, lr_0 = 6.3874e-04
Loss = 2.3996e-02, PNorm = 123.9115, GNorm = 0.5435, lr_0 = 6.3831e-04
Loss = 2.0084e-02, PNorm = 123.9579, GNorm = 0.2056, lr_0 = 6.3787e-04
Loss = 2.2524e-02, PNorm = 124.0033, GNorm = 0.1892, lr_0 = 6.3743e-04
Loss = 2.2234e-02, PNorm = 124.0444, GNorm = 0.6662, lr_0 = 6.3700e-04
Loss = 2.0501e-02, PNorm = 124.0846, GNorm = 0.6253, lr_0 = 6.3656e-04
Loss = 2.2016e-02, PNorm = 124.1391, GNorm = 0.3959, lr_0 = 6.3612e-04
Loss = 1.9356e-02, PNorm = 124.1834, GNorm = 0.2178, lr_0 = 6.3569e-04
Loss = 2.4813e-02, PNorm = 124.2247, GNorm = 0.2947, lr_0 = 6.3525e-04
Loss = 2.3338e-02, PNorm = 124.2644, GNorm = 0.3056, lr_0 = 6.3482e-04
Loss = 2.3644e-02, PNorm = 124.3123, GNorm = 0.5124, lr_0 = 6.3438e-04
Loss = 2.2824e-02, PNorm = 124.3619, GNorm = 0.6297, lr_0 = 6.3395e-04
Loss = 2.4754e-02, PNorm = 124.4088, GNorm = 0.4465, lr_0 = 6.3351e-04
Loss = 2.3025e-02, PNorm = 124.4559, GNorm = 0.4062, lr_0 = 6.3308e-04
Loss = 2.1030e-02, PNorm = 124.5028, GNorm = 0.1805, lr_0 = 6.3265e-04
Loss = 2.6565e-02, PNorm = 124.5472, GNorm = 0.3478, lr_0 = 6.3221e-04
Loss = 2.1896e-02, PNorm = 124.5930, GNorm = 0.3601, lr_0 = 6.3178e-04
Loss = 2.4366e-02, PNorm = 124.6388, GNorm = 0.4161, lr_0 = 6.3135e-04
Loss = 2.5210e-02, PNorm = 124.6967, GNorm = 0.5371, lr_0 = 6.3091e-04
Loss = 2.5242e-02, PNorm = 124.7550, GNorm = 0.6274, lr_0 = 6.3048e-04
Loss = 2.3748e-02, PNorm = 124.8067, GNorm = 0.2873, lr_0 = 6.3005e-04
Loss = 2.0227e-02, PNorm = 124.8514, GNorm = 0.2296, lr_0 = 6.2962e-04
Loss = 2.0559e-02, PNorm = 124.9062, GNorm = 0.4977, lr_0 = 6.2919e-04
Loss = 2.3257e-02, PNorm = 124.9553, GNorm = 0.5507, lr_0 = 6.2876e-04
Loss = 2.5841e-02, PNorm = 125.0000, GNorm = 0.4411, lr_0 = 6.2833e-04
Loss = 2.3131e-02, PNorm = 125.0501, GNorm = 0.3015, lr_0 = 6.2789e-04
Loss = 2.3432e-02, PNorm = 125.1003, GNorm = 0.1897, lr_0 = 6.2746e-04
Loss = 2.4340e-02, PNorm = 125.1523, GNorm = 0.2567, lr_0 = 6.2703e-04
Loss = 2.0407e-02, PNorm = 125.2006, GNorm = 0.2578, lr_0 = 6.2661e-04
Loss = 2.3596e-02, PNorm = 125.2502, GNorm = 0.2786, lr_0 = 6.2618e-04
Loss = 2.1472e-02, PNorm = 125.3025, GNorm = 0.1780, lr_0 = 6.2575e-04
Loss = 2.0951e-02, PNorm = 125.3485, GNorm = 0.2105, lr_0 = 6.2532e-04
Loss = 2.0079e-02, PNorm = 125.3984, GNorm = 0.2390, lr_0 = 6.2489e-04
Loss = 2.4520e-02, PNorm = 125.4441, GNorm = 0.4513, lr_0 = 6.2446e-04
Loss = 2.1497e-02, PNorm = 125.4875, GNorm = 0.3411, lr_0 = 6.2403e-04
Loss = 2.5368e-02, PNorm = 125.5344, GNorm = 0.3482, lr_0 = 6.2361e-04
Loss = 2.2943e-02, PNorm = 125.5798, GNorm = 0.4607, lr_0 = 6.2318e-04
Loss = 2.3383e-02, PNorm = 125.6353, GNorm = 0.4713, lr_0 = 6.2275e-04
Loss = 2.4496e-02, PNorm = 125.6925, GNorm = 0.6721, lr_0 = 6.2233e-04
Loss = 2.9436e-02, PNorm = 125.7514, GNorm = 0.2516, lr_0 = 6.2190e-04
Loss = 2.4289e-02, PNorm = 125.8055, GNorm = 0.5596, lr_0 = 6.2147e-04
Loss = 2.3521e-02, PNorm = 125.8608, GNorm = 0.2453, lr_0 = 6.2105e-04
Loss = 2.4559e-02, PNorm = 125.9070, GNorm = 0.4523, lr_0 = 6.2062e-04
Loss = 1.9727e-02, PNorm = 125.9510, GNorm = 0.2525, lr_0 = 6.2020e-04
Loss = 2.3579e-02, PNorm = 126.0023, GNorm = 0.6228, lr_0 = 6.1977e-04
Loss = 2.7657e-02, PNorm = 126.0531, GNorm = 0.3591, lr_0 = 6.1935e-04
Loss = 2.3555e-02, PNorm = 126.1064, GNorm = 0.5937, lr_0 = 6.1892e-04
Loss = 2.2249e-02, PNorm = 126.1531, GNorm = 0.3408, lr_0 = 6.1850e-04
Loss = 2.1442e-02, PNorm = 126.1954, GNorm = 0.7349, lr_0 = 6.1808e-04
Loss = 2.6138e-02, PNorm = 126.2368, GNorm = 0.2878, lr_0 = 6.1765e-04
Loss = 2.7919e-02, PNorm = 126.2854, GNorm = 0.4187, lr_0 = 6.1723e-04
Loss = 2.2045e-02, PNorm = 126.3392, GNorm = 0.1954, lr_0 = 6.1681e-04
Loss = 2.5117e-02, PNorm = 126.3945, GNorm = 0.2770, lr_0 = 6.1638e-04
Loss = 2.5232e-02, PNorm = 126.4506, GNorm = 0.4636, lr_0 = 6.1596e-04
Loss = 3.0659e-02, PNorm = 126.5017, GNorm = 0.3816, lr_0 = 6.1554e-04
Loss = 2.2817e-02, PNorm = 126.5514, GNorm = 0.4447, lr_0 = 6.1512e-04
Loss = 2.4192e-02, PNorm = 126.6070, GNorm = 0.5585, lr_0 = 6.1470e-04
Loss = 1.9916e-02, PNorm = 126.6615, GNorm = 0.2416, lr_0 = 6.1428e-04
Loss = 1.9354e-02, PNorm = 126.7043, GNorm = 0.2077, lr_0 = 6.1385e-04
Loss = 2.3847e-02, PNorm = 126.7520, GNorm = 0.4452, lr_0 = 6.1343e-04
Loss = 2.9292e-02, PNorm = 126.7978, GNorm = 0.4978, lr_0 = 6.1301e-04
Loss = 2.1951e-02, PNorm = 126.8491, GNorm = 0.3050, lr_0 = 6.1259e-04
Loss = 2.1748e-02, PNorm = 126.9047, GNorm = 0.5895, lr_0 = 6.1217e-04
Loss = 2.3693e-02, PNorm = 126.9540, GNorm = 0.2736, lr_0 = 6.1175e-04
Loss = 2.6401e-02, PNorm = 127.0114, GNorm = 0.3233, lr_0 = 6.1134e-04
Loss = 2.3471e-02, PNorm = 127.0629, GNorm = 0.3449, lr_0 = 6.1092e-04
Loss = 2.5655e-02, PNorm = 127.1156, GNorm = 0.3172, lr_0 = 6.1050e-04
Validation mae = 0.486398
Epoch 8
Loss = 1.9194e-02, PNorm = 127.1606, GNorm = 0.1530, lr_0 = 6.1008e-04
Loss = 1.9104e-02, PNorm = 127.1987, GNorm = 0.4942, lr_0 = 6.0966e-04
Loss = 2.0526e-02, PNorm = 127.2347, GNorm = 0.5485, lr_0 = 6.0924e-04
Loss = 1.6732e-02, PNorm = 127.2701, GNorm = 0.2589, lr_0 = 6.0883e-04
Loss = 1.8936e-02, PNorm = 127.3035, GNorm = 0.1324, lr_0 = 6.0841e-04
Loss = 1.9278e-02, PNorm = 127.3383, GNorm = 0.6192, lr_0 = 6.0799e-04
Loss = 1.8613e-02, PNorm = 127.3708, GNorm = 0.2856, lr_0 = 6.0758e-04
Loss = 1.7955e-02, PNorm = 127.4088, GNorm = 0.1421, lr_0 = 6.0716e-04
Loss = 1.7538e-02, PNorm = 127.4463, GNorm = 0.4143, lr_0 = 6.0674e-04
Loss = 2.0298e-02, PNorm = 127.4753, GNorm = 0.1709, lr_0 = 6.0633e-04
Loss = 1.6190e-02, PNorm = 127.5095, GNorm = 0.5444, lr_0 = 6.0591e-04
Loss = 1.8344e-02, PNorm = 127.5392, GNorm = 0.2182, lr_0 = 6.0550e-04
Loss = 1.6454e-02, PNorm = 127.5682, GNorm = 0.3281, lr_0 = 6.0508e-04
Loss = 1.9561e-02, PNorm = 127.6014, GNorm = 0.1341, lr_0 = 6.0467e-04
Loss = 1.8917e-02, PNorm = 127.6396, GNorm = 0.4614, lr_0 = 6.0425e-04
Loss = 1.7630e-02, PNorm = 127.6712, GNorm = 0.2489, lr_0 = 6.0384e-04
Loss = 1.8839e-02, PNorm = 127.7042, GNorm = 0.2560, lr_0 = 6.0343e-04
Loss = 1.8419e-02, PNorm = 127.7416, GNorm = 0.1380, lr_0 = 6.0301e-04
Loss = 1.7562e-02, PNorm = 127.7787, GNorm = 0.2287, lr_0 = 6.0260e-04
Loss = 1.5837e-02, PNorm = 127.8140, GNorm = 0.4519, lr_0 = 6.0219e-04
Loss = 2.1310e-02, PNorm = 127.8481, GNorm = 0.2090, lr_0 = 6.0178e-04
Loss = 1.9884e-02, PNorm = 127.8815, GNorm = 0.5480, lr_0 = 6.0136e-04
Loss = 1.8052e-02, PNorm = 127.9206, GNorm = 0.2316, lr_0 = 6.0095e-04
Loss = 1.8986e-02, PNorm = 127.9555, GNorm = 0.1727, lr_0 = 6.0054e-04
Loss = 1.7497e-02, PNorm = 127.9931, GNorm = 0.3026, lr_0 = 6.0013e-04
Loss = 2.1135e-02, PNorm = 128.0211, GNorm = 0.1990, lr_0 = 5.9972e-04
Loss = 1.8416e-02, PNorm = 128.0535, GNorm = 0.2943, lr_0 = 5.9931e-04
Loss = 1.9060e-02, PNorm = 128.0920, GNorm = 0.2947, lr_0 = 5.9890e-04
Loss = 1.4915e-02, PNorm = 128.1323, GNorm = 0.1923, lr_0 = 5.9849e-04
Loss = 1.8223e-02, PNorm = 128.1628, GNorm = 0.2513, lr_0 = 5.9808e-04
Loss = 1.7421e-02, PNorm = 128.2035, GNorm = 0.4750, lr_0 = 5.9767e-04
Loss = 2.2191e-02, PNorm = 128.2440, GNorm = 0.2801, lr_0 = 5.9726e-04
Loss = 1.7979e-02, PNorm = 128.2840, GNorm = 0.3078, lr_0 = 5.9685e-04
Loss = 1.9463e-02, PNorm = 128.3130, GNorm = 0.5707, lr_0 = 5.9644e-04
Loss = 1.9338e-02, PNorm = 128.3481, GNorm = 0.3711, lr_0 = 5.9603e-04
Loss = 1.7794e-02, PNorm = 128.3861, GNorm = 0.4369, lr_0 = 5.9562e-04
Loss = 1.7530e-02, PNorm = 128.4233, GNorm = 0.3365, lr_0 = 5.9521e-04
Loss = 1.5773e-02, PNorm = 128.4553, GNorm = 0.1652, lr_0 = 5.9481e-04
Loss = 1.6321e-02, PNorm = 128.4933, GNorm = 0.2719, lr_0 = 5.9440e-04
Loss = 1.8537e-02, PNorm = 128.5313, GNorm = 0.2663, lr_0 = 5.9399e-04
Loss = 1.8500e-02, PNorm = 128.5717, GNorm = 0.4446, lr_0 = 5.9358e-04
Loss = 1.5052e-02, PNorm = 128.6125, GNorm = 0.1920, lr_0 = 5.9318e-04
Loss = 1.6512e-02, PNorm = 128.6479, GNorm = 0.3805, lr_0 = 5.9277e-04
Loss = 2.0745e-02, PNorm = 128.6800, GNorm = 0.3469, lr_0 = 5.9236e-04
Loss = 1.7376e-02, PNorm = 128.7190, GNorm = 0.2589, lr_0 = 5.9196e-04
Loss = 1.7334e-02, PNorm = 128.7549, GNorm = 0.2162, lr_0 = 5.9155e-04
Loss = 1.8841e-02, PNorm = 128.7925, GNorm = 0.4010, lr_0 = 5.9115e-04
Loss = 1.4704e-02, PNorm = 128.8219, GNorm = 0.1395, lr_0 = 5.9074e-04
Loss = 1.8454e-02, PNorm = 128.8517, GNorm = 0.2185, lr_0 = 5.9034e-04
Loss = 1.8621e-02, PNorm = 128.8920, GNorm = 0.4839, lr_0 = 5.8993e-04
Loss = 2.2351e-02, PNorm = 128.9356, GNorm = 0.6127, lr_0 = 5.8953e-04
Loss = 1.8322e-02, PNorm = 128.9827, GNorm = 0.4414, lr_0 = 5.8913e-04
Loss = 1.9063e-02, PNorm = 129.0185, GNorm = 0.1926, lr_0 = 5.8872e-04
Loss = 1.6123e-02, PNorm = 129.0574, GNorm = 0.3736, lr_0 = 5.8832e-04
Loss = 1.6366e-02, PNorm = 129.0960, GNorm = 0.3946, lr_0 = 5.8792e-04
Loss = 1.4753e-02, PNorm = 129.1340, GNorm = 0.5773, lr_0 = 5.8751e-04
Loss = 1.6826e-02, PNorm = 129.1692, GNorm = 0.3614, lr_0 = 5.8711e-04
Loss = 1.6702e-02, PNorm = 129.2059, GNorm = 0.1950, lr_0 = 5.8671e-04
Loss = 1.8499e-02, PNorm = 129.2457, GNorm = 0.6798, lr_0 = 5.8631e-04
Loss = 1.9397e-02, PNorm = 129.2817, GNorm = 0.1312, lr_0 = 5.8591e-04
Loss = 1.7616e-02, PNorm = 129.3189, GNorm = 0.3676, lr_0 = 5.8550e-04
Loss = 1.8389e-02, PNorm = 129.3582, GNorm = 0.1327, lr_0 = 5.8510e-04
Loss = 1.8626e-02, PNorm = 129.3995, GNorm = 0.4522, lr_0 = 5.8470e-04
Loss = 1.9868e-02, PNorm = 129.4368, GNorm = 0.3468, lr_0 = 5.8430e-04
Loss = 1.5693e-02, PNorm = 129.4770, GNorm = 0.5822, lr_0 = 5.8390e-04
Loss = 1.7600e-02, PNorm = 129.5250, GNorm = 0.1297, lr_0 = 5.8350e-04
Loss = 1.8933e-02, PNorm = 129.5649, GNorm = 0.2466, lr_0 = 5.8310e-04
Loss = 1.6046e-02, PNorm = 129.6024, GNorm = 0.1990, lr_0 = 5.8270e-04
Loss = 1.6048e-02, PNorm = 129.6431, GNorm = 0.2061, lr_0 = 5.8230e-04
Loss = 1.8879e-02, PNorm = 129.6812, GNorm = 0.3539, lr_0 = 5.8190e-04
Loss = 1.5600e-02, PNorm = 129.7189, GNorm = 0.1336, lr_0 = 5.8151e-04
Loss = 1.7617e-02, PNorm = 129.7565, GNorm = 0.3704, lr_0 = 5.8111e-04
Loss = 1.9182e-02, PNorm = 129.7976, GNorm = 0.6877, lr_0 = 5.8071e-04
Loss = 2.1745e-02, PNorm = 129.8438, GNorm = 0.4660, lr_0 = 5.8031e-04
Loss = 1.9429e-02, PNorm = 129.8856, GNorm = 0.5750, lr_0 = 5.7991e-04
Loss = 1.7936e-02, PNorm = 129.9280, GNorm = 0.2441, lr_0 = 5.7952e-04
Loss = 1.7141e-02, PNorm = 129.9665, GNorm = 0.2341, lr_0 = 5.7912e-04
Loss = 1.6778e-02, PNorm = 130.0040, GNorm = 0.3098, lr_0 = 5.7872e-04
Loss = 1.9827e-02, PNorm = 130.0426, GNorm = 0.4883, lr_0 = 5.7833e-04
Loss = 1.7985e-02, PNorm = 130.0781, GNorm = 0.2333, lr_0 = 5.7793e-04
Loss = 1.7121e-02, PNorm = 130.1170, GNorm = 0.3077, lr_0 = 5.7753e-04
Loss = 2.0181e-02, PNorm = 130.1573, GNorm = 0.4411, lr_0 = 5.7714e-04
Loss = 1.5490e-02, PNorm = 130.2028, GNorm = 0.4348, lr_0 = 5.7674e-04
Loss = 1.7164e-02, PNorm = 130.2466, GNorm = 0.2714, lr_0 = 5.7635e-04
Loss = 2.1872e-02, PNorm = 130.2841, GNorm = 0.5839, lr_0 = 5.7595e-04
Loss = 2.0532e-02, PNorm = 130.3264, GNorm = 0.4407, lr_0 = 5.7556e-04
Loss = 1.7595e-02, PNorm = 130.3702, GNorm = 0.2550, lr_0 = 5.7516e-04
Loss = 2.2922e-02, PNorm = 130.4122, GNorm = 0.2873, lr_0 = 5.7477e-04
Loss = 2.0309e-02, PNorm = 130.4528, GNorm = 0.2482, lr_0 = 5.7438e-04
Loss = 1.8541e-02, PNorm = 130.4951, GNorm = 0.2657, lr_0 = 5.7398e-04
Loss = 1.7011e-02, PNorm = 130.5379, GNorm = 0.3112, lr_0 = 5.7359e-04
Loss = 1.7679e-02, PNorm = 130.5795, GNorm = 0.3331, lr_0 = 5.7320e-04
Loss = 1.8613e-02, PNorm = 130.6213, GNorm = 0.3704, lr_0 = 5.7280e-04
Loss = 1.5646e-02, PNorm = 130.6591, GNorm = 0.1542, lr_0 = 5.7241e-04
Loss = 1.6921e-02, PNorm = 130.7033, GNorm = 0.2123, lr_0 = 5.7202e-04
Loss = 1.6082e-02, PNorm = 130.7446, GNorm = 0.2541, lr_0 = 5.7163e-04
Loss = 2.0939e-02, PNorm = 130.7877, GNorm = 0.1692, lr_0 = 5.7124e-04
Loss = 2.1220e-02, PNorm = 130.8346, GNorm = 0.4069, lr_0 = 5.7084e-04
Loss = 1.8174e-02, PNorm = 130.8842, GNorm = 0.4289, lr_0 = 5.7045e-04
Loss = 1.7144e-02, PNorm = 130.9274, GNorm = 0.5612, lr_0 = 5.7006e-04
Loss = 1.6488e-02, PNorm = 130.9664, GNorm = 0.3055, lr_0 = 5.6967e-04
Loss = 1.7479e-02, PNorm = 131.0069, GNorm = 0.2356, lr_0 = 5.6928e-04
Loss = 2.1603e-02, PNorm = 131.0489, GNorm = 0.1935, lr_0 = 5.6889e-04
Loss = 1.9119e-02, PNorm = 131.0992, GNorm = 0.3638, lr_0 = 5.6850e-04
Loss = 1.7237e-02, PNorm = 131.1407, GNorm = 0.2522, lr_0 = 5.6811e-04
Loss = 2.2786e-02, PNorm = 131.1860, GNorm = 0.2713, lr_0 = 5.6772e-04
Loss = 2.0054e-02, PNorm = 131.2342, GNorm = 0.1756, lr_0 = 5.6733e-04
Loss = 1.9145e-02, PNorm = 131.2778, GNorm = 0.1228, lr_0 = 5.6695e-04
Loss = 1.8231e-02, PNorm = 131.3234, GNorm = 0.3069, lr_0 = 5.6656e-04
Loss = 2.4675e-02, PNorm = 131.3672, GNorm = 0.3881, lr_0 = 5.6617e-04
Loss = 1.7795e-02, PNorm = 131.4104, GNorm = 0.5214, lr_0 = 5.6578e-04
Loss = 1.8545e-02, PNorm = 131.4542, GNorm = 0.1684, lr_0 = 5.6539e-04
Loss = 2.0428e-02, PNorm = 131.5028, GNorm = 0.3366, lr_0 = 5.6501e-04
Loss = 2.0789e-02, PNorm = 131.5530, GNorm = 0.6893, lr_0 = 5.6462e-04
Loss = 1.8420e-02, PNorm = 131.5980, GNorm = 0.4173, lr_0 = 5.6423e-04
Loss = 1.8196e-02, PNorm = 131.6355, GNorm = 0.2415, lr_0 = 5.6385e-04
Loss = 2.0759e-02, PNorm = 131.6725, GNorm = 0.3116, lr_0 = 5.6346e-04
Loss = 1.9769e-02, PNorm = 131.7150, GNorm = 0.2382, lr_0 = 5.6307e-04
Loss = 1.8272e-02, PNorm = 131.7625, GNorm = 0.3960, lr_0 = 5.6269e-04
Loss = 1.8846e-02, PNorm = 131.8117, GNorm = 0.1990, lr_0 = 5.6230e-04
Validation mae = 0.485951
Epoch 9
Loss = 1.5872e-02, PNorm = 131.8565, GNorm = 0.2064, lr_0 = 5.6192e-04
Loss = 1.7570e-02, PNorm = 131.8910, GNorm = 1.1073, lr_0 = 5.6153e-04
Loss = 1.4818e-02, PNorm = 131.9123, GNorm = 0.3367, lr_0 = 5.6115e-04
Loss = 1.9205e-02, PNorm = 131.9396, GNorm = 0.3895, lr_0 = 5.6076e-04
Loss = 1.4964e-02, PNorm = 131.9702, GNorm = 0.5991, lr_0 = 5.6038e-04
Loss = 1.6796e-02, PNorm = 132.0027, GNorm = 0.1721, lr_0 = 5.6000e-04
Loss = 1.3642e-02, PNorm = 132.0346, GNorm = 0.2624, lr_0 = 5.5961e-04
Loss = 1.6498e-02, PNorm = 132.0607, GNorm = 0.6541, lr_0 = 5.5923e-04
Loss = 1.5133e-02, PNorm = 132.0818, GNorm = 0.4239, lr_0 = 5.5885e-04
Loss = 1.4824e-02, PNorm = 132.1098, GNorm = 0.2119, lr_0 = 5.5846e-04
Loss = 1.8876e-02, PNorm = 132.1410, GNorm = 0.2704, lr_0 = 5.5808e-04
Loss = 1.6838e-02, PNorm = 132.1685, GNorm = 0.2020, lr_0 = 5.5770e-04
Loss = 1.4925e-02, PNorm = 132.1963, GNorm = 0.3862, lr_0 = 5.5732e-04
Loss = 1.4218e-02, PNorm = 132.2273, GNorm = 0.2500, lr_0 = 5.5693e-04
Loss = 1.4333e-02, PNorm = 132.2548, GNorm = 0.3277, lr_0 = 5.5655e-04
Loss = 1.4986e-02, PNorm = 132.2867, GNorm = 0.1954, lr_0 = 5.5617e-04
Loss = 1.6044e-02, PNorm = 132.3107, GNorm = 0.2277, lr_0 = 5.5579e-04
Loss = 1.7942e-02, PNorm = 132.3387, GNorm = 0.2526, lr_0 = 5.5541e-04
Loss = 1.6803e-02, PNorm = 132.3680, GNorm = 0.1239, lr_0 = 5.5503e-04
Loss = 1.4038e-02, PNorm = 132.3947, GNorm = 0.3534, lr_0 = 5.5465e-04
Loss = 1.5450e-02, PNorm = 132.4254, GNorm = 0.6056, lr_0 = 5.5427e-04
Loss = 1.6645e-02, PNorm = 132.4636, GNorm = 0.6499, lr_0 = 5.5389e-04
Loss = 1.4106e-02, PNorm = 132.5011, GNorm = 0.1325, lr_0 = 5.5351e-04
Loss = 1.4513e-02, PNorm = 132.5269, GNorm = 0.1512, lr_0 = 5.5313e-04
Loss = 1.3475e-02, PNorm = 132.5499, GNorm = 0.5364, lr_0 = 5.5275e-04
Loss = 1.5564e-02, PNorm = 132.5768, GNorm = 0.3193, lr_0 = 5.5237e-04
Loss = 1.6514e-02, PNorm = 132.6019, GNorm = 0.2194, lr_0 = 5.5199e-04
Loss = 1.8431e-02, PNorm = 132.6322, GNorm = 0.1679, lr_0 = 5.5162e-04
Loss = 1.4700e-02, PNorm = 132.6587, GNorm = 0.2194, lr_0 = 5.5124e-04
Loss = 1.3530e-02, PNorm = 132.6917, GNorm = 0.4342, lr_0 = 5.5086e-04
Loss = 1.7422e-02, PNorm = 132.7198, GNorm = 0.2794, lr_0 = 5.5048e-04
Loss = 1.4818e-02, PNorm = 132.7521, GNorm = 0.6252, lr_0 = 5.5011e-04
Loss = 1.7462e-02, PNorm = 132.7845, GNorm = 0.5041, lr_0 = 5.4973e-04
Loss = 1.5800e-02, PNorm = 132.8203, GNorm = 0.2336, lr_0 = 5.4935e-04
Loss = 1.3265e-02, PNorm = 132.8557, GNorm = 0.1457, lr_0 = 5.4898e-04
Loss = 1.5859e-02, PNorm = 132.8872, GNorm = 0.5462, lr_0 = 5.4860e-04
Loss = 1.5782e-02, PNorm = 132.9217, GNorm = 0.2494, lr_0 = 5.4822e-04
Loss = 1.6521e-02, PNorm = 132.9525, GNorm = 0.3298, lr_0 = 5.4785e-04
Loss = 1.3626e-02, PNorm = 132.9894, GNorm = 0.1157, lr_0 = 5.4747e-04
Loss = 1.6111e-02, PNorm = 133.0257, GNorm = 0.3076, lr_0 = 5.4710e-04
Loss = 1.2657e-02, PNorm = 133.0598, GNorm = 0.1815, lr_0 = 5.4672e-04
Loss = 1.2037e-02, PNorm = 133.0882, GNorm = 0.6151, lr_0 = 5.4635e-04
Loss = 1.4922e-02, PNorm = 133.1125, GNorm = 0.5191, lr_0 = 5.4597e-04
Loss = 1.1982e-02, PNorm = 133.1380, GNorm = 0.3455, lr_0 = 5.4560e-04
Loss = 1.5909e-02, PNorm = 133.1636, GNorm = 0.3709, lr_0 = 5.4523e-04
Loss = 1.2071e-02, PNorm = 133.1873, GNorm = 0.3003, lr_0 = 5.4485e-04
Loss = 1.3850e-02, PNorm = 133.2201, GNorm = 0.2246, lr_0 = 5.4448e-04
Loss = 1.6492e-02, PNorm = 133.2507, GNorm = 0.2180, lr_0 = 5.4411e-04
Loss = 1.4810e-02, PNorm = 133.2797, GNorm = 0.4630, lr_0 = 5.4373e-04
Loss = 1.6793e-02, PNorm = 133.3141, GNorm = 0.3974, lr_0 = 5.4336e-04
Loss = 1.4786e-02, PNorm = 133.3516, GNorm = 0.4970, lr_0 = 5.4299e-04
Loss = 1.4002e-02, PNorm = 133.3836, GNorm = 0.1440, lr_0 = 5.4262e-04
Loss = 1.4496e-02, PNorm = 133.4151, GNorm = 0.2447, lr_0 = 5.4225e-04
Loss = 1.6991e-02, PNorm = 133.4490, GNorm = 0.4177, lr_0 = 5.4187e-04
Loss = 1.2825e-02, PNorm = 133.4916, GNorm = 0.5057, lr_0 = 5.4150e-04
Loss = 1.5139e-02, PNorm = 133.5303, GNorm = 0.3728, lr_0 = 5.4113e-04
Loss = 1.2801e-02, PNorm = 133.5630, GNorm = 0.1298, lr_0 = 5.4076e-04
Loss = 1.3698e-02, PNorm = 133.5932, GNorm = 0.2887, lr_0 = 5.4039e-04
Loss = 1.6627e-02, PNorm = 133.6242, GNorm = 0.4102, lr_0 = 5.4002e-04
Loss = 1.4982e-02, PNorm = 133.6530, GNorm = 0.1493, lr_0 = 5.3965e-04
Loss = 1.3665e-02, PNorm = 133.6825, GNorm = 0.1563, lr_0 = 5.3928e-04
Loss = 1.5261e-02, PNorm = 133.7163, GNorm = 0.5141, lr_0 = 5.3891e-04
Loss = 1.4164e-02, PNorm = 133.7544, GNorm = 0.2632, lr_0 = 5.3854e-04
Loss = 1.5183e-02, PNorm = 133.7920, GNorm = 0.1088, lr_0 = 5.3817e-04
Loss = 1.3440e-02, PNorm = 133.8283, GNorm = 0.4027, lr_0 = 5.3781e-04
Loss = 1.2877e-02, PNorm = 133.8561, GNorm = 0.2168, lr_0 = 5.3744e-04
Loss = 1.5103e-02, PNorm = 133.8822, GNorm = 0.7571, lr_0 = 5.3707e-04
Loss = 1.4420e-02, PNorm = 133.9132, GNorm = 0.2456, lr_0 = 5.3670e-04
Loss = 1.3296e-02, PNorm = 133.9464, GNorm = 0.1645, lr_0 = 5.3633e-04
Loss = 1.2890e-02, PNorm = 133.9808, GNorm = 0.3026, lr_0 = 5.3597e-04
Loss = 1.4392e-02, PNorm = 134.0145, GNorm = 0.2647, lr_0 = 5.3560e-04
Loss = 1.4741e-02, PNorm = 134.0410, GNorm = 0.4762, lr_0 = 5.3523e-04
Loss = 1.4345e-02, PNorm = 134.0741, GNorm = 0.2148, lr_0 = 5.3486e-04
Loss = 1.6044e-02, PNorm = 134.1157, GNorm = 0.1471, lr_0 = 5.3450e-04
Loss = 1.6691e-02, PNorm = 134.1528, GNorm = 0.1607, lr_0 = 5.3413e-04
Loss = 1.5467e-02, PNorm = 134.1823, GNorm = 0.6552, lr_0 = 5.3377e-04
Loss = 1.3659e-02, PNorm = 134.2176, GNorm = 0.3408, lr_0 = 5.3340e-04
Loss = 1.2843e-02, PNorm = 134.2518, GNorm = 0.1696, lr_0 = 5.3304e-04
Loss = 1.2697e-02, PNorm = 134.2837, GNorm = 0.4041, lr_0 = 5.3267e-04
Loss = 1.3645e-02, PNorm = 134.3117, GNorm = 0.5653, lr_0 = 5.3231e-04
Loss = 1.2829e-02, PNorm = 134.3456, GNorm = 0.1977, lr_0 = 5.3194e-04
Loss = 1.4108e-02, PNorm = 134.3729, GNorm = 0.1682, lr_0 = 5.3158e-04
Loss = 1.6270e-02, PNorm = 134.4048, GNorm = 0.3035, lr_0 = 5.3121e-04
Loss = 1.4317e-02, PNorm = 134.4344, GNorm = 0.3722, lr_0 = 5.3085e-04
Loss = 1.7845e-02, PNorm = 134.4693, GNorm = 0.4042, lr_0 = 5.3048e-04
Loss = 1.5222e-02, PNorm = 134.5056, GNorm = 0.4898, lr_0 = 5.3012e-04
Loss = 1.3889e-02, PNorm = 134.5342, GNorm = 0.1583, lr_0 = 5.2976e-04
Loss = 1.6734e-02, PNorm = 134.5717, GNorm = 0.8321, lr_0 = 5.2939e-04
Loss = 1.4024e-02, PNorm = 134.6103, GNorm = 0.1207, lr_0 = 5.2903e-04
Loss = 1.5917e-02, PNorm = 134.6487, GNorm = 0.9377, lr_0 = 5.2867e-04
Loss = 1.9011e-02, PNorm = 134.6851, GNorm = 0.7420, lr_0 = 5.2831e-04
Loss = 1.5050e-02, PNorm = 134.7210, GNorm = 0.1627, lr_0 = 5.2795e-04
Loss = 1.4413e-02, PNorm = 134.7516, GNorm = 0.3289, lr_0 = 5.2758e-04
Loss = 1.6664e-02, PNorm = 134.7886, GNorm = 0.1559, lr_0 = 5.2722e-04
Loss = 1.7322e-02, PNorm = 134.8237, GNorm = 0.2790, lr_0 = 5.2686e-04
Loss = 1.5599e-02, PNorm = 134.8547, GNorm = 0.2597, lr_0 = 5.2650e-04
Loss = 1.3822e-02, PNorm = 134.8892, GNorm = 0.3205, lr_0 = 5.2614e-04
Loss = 1.7090e-02, PNorm = 134.9258, GNorm = 0.4977, lr_0 = 5.2578e-04
Loss = 2.1186e-02, PNorm = 134.9697, GNorm = 0.2353, lr_0 = 5.2542e-04
Loss = 1.4268e-02, PNorm = 135.0063, GNorm = 0.3857, lr_0 = 5.2506e-04
Loss = 1.6435e-02, PNorm = 135.0470, GNorm = 0.2166, lr_0 = 5.2470e-04
Loss = 1.3838e-02, PNorm = 135.0863, GNorm = 0.1613, lr_0 = 5.2434e-04
Loss = 1.8118e-02, PNorm = 135.1186, GNorm = 0.2363, lr_0 = 5.2398e-04
Loss = 1.4558e-02, PNorm = 135.1533, GNorm = 0.1890, lr_0 = 5.2362e-04
Loss = 1.5035e-02, PNorm = 135.1780, GNorm = 0.1726, lr_0 = 5.2326e-04
Loss = 1.5359e-02, PNorm = 135.2037, GNorm = 0.5134, lr_0 = 5.2290e-04
Loss = 1.4548e-02, PNorm = 135.2435, GNorm = 0.3116, lr_0 = 5.2255e-04
Loss = 1.6026e-02, PNorm = 135.2848, GNorm = 0.5940, lr_0 = 5.2219e-04
Loss = 1.4575e-02, PNorm = 135.3231, GNorm = 0.1536, lr_0 = 5.2183e-04
Loss = 1.6882e-02, PNorm = 135.3648, GNorm = 0.2712, lr_0 = 5.2147e-04
Loss = 1.4826e-02, PNorm = 135.4047, GNorm = 0.3221, lr_0 = 5.2112e-04
Loss = 1.4782e-02, PNorm = 135.4369, GNorm = 0.1415, lr_0 = 5.2076e-04
Loss = 1.5981e-02, PNorm = 135.4736, GNorm = 0.1260, lr_0 = 5.2040e-04
Loss = 1.4123e-02, PNorm = 135.5030, GNorm = 0.5795, lr_0 = 5.2005e-04
Loss = 1.6405e-02, PNorm = 135.5378, GNorm = 0.3426, lr_0 = 5.1969e-04
Loss = 1.6888e-02, PNorm = 135.5701, GNorm = 0.7744, lr_0 = 5.1933e-04
Loss = 1.6210e-02, PNorm = 135.6037, GNorm = 0.2012, lr_0 = 5.1898e-04
Loss = 1.4011e-02, PNorm = 135.6436, GNorm = 0.3314, lr_0 = 5.1862e-04
Loss = 1.5526e-02, PNorm = 135.6827, GNorm = 0.2693, lr_0 = 5.1827e-04
Loss = 1.2551e-02, PNorm = 135.7203, GNorm = 0.4262, lr_0 = 5.1791e-04
Validation mae = 0.483093
Epoch 10
Loss = 1.1509e-02, PNorm = 135.7477, GNorm = 0.2039, lr_0 = 5.1756e-04
Loss = 1.2914e-02, PNorm = 135.7693, GNorm = 0.3127, lr_0 = 5.1720e-04
Loss = 1.3052e-02, PNorm = 135.7882, GNorm = 0.3938, lr_0 = 5.1685e-04
Loss = 1.3668e-02, PNorm = 135.8134, GNorm = 0.2108, lr_0 = 5.1649e-04
Loss = 1.1164e-02, PNorm = 135.8386, GNorm = 0.4601, lr_0 = 5.1614e-04
Loss = 1.2147e-02, PNorm = 135.8621, GNorm = 0.3054, lr_0 = 5.1579e-04
Loss = 1.0674e-02, PNorm = 135.8873, GNorm = 0.3984, lr_0 = 5.1543e-04
Loss = 1.3860e-02, PNorm = 135.9075, GNorm = 0.1555, lr_0 = 5.1508e-04
Loss = 1.3725e-02, PNorm = 135.9265, GNorm = 0.2017, lr_0 = 5.1473e-04
Loss = 1.3327e-02, PNorm = 135.9477, GNorm = 0.2595, lr_0 = 5.1437e-04
Loss = 1.5887e-02, PNorm = 135.9734, GNorm = 0.1808, lr_0 = 5.1402e-04
Loss = 1.2360e-02, PNorm = 136.0050, GNorm = 0.3306, lr_0 = 5.1367e-04
Loss = 1.4636e-02, PNorm = 136.0350, GNorm = 0.7824, lr_0 = 5.1332e-04
Loss = 1.1390e-02, PNorm = 136.0584, GNorm = 0.3616, lr_0 = 5.1297e-04
Loss = 1.3552e-02, PNorm = 136.0864, GNorm = 0.2800, lr_0 = 5.1262e-04
Loss = 1.2879e-02, PNorm = 136.1090, GNorm = 0.3515, lr_0 = 5.1226e-04
Loss = 1.6777e-02, PNorm = 136.1276, GNorm = 0.3652, lr_0 = 5.1191e-04
Loss = 1.1958e-02, PNorm = 136.1507, GNorm = 0.3129, lr_0 = 5.1156e-04
Loss = 1.2359e-02, PNorm = 136.1785, GNorm = 0.5570, lr_0 = 5.1121e-04
Loss = 1.2915e-02, PNorm = 136.2071, GNorm = 0.3850, lr_0 = 5.1086e-04
Loss = 1.2099e-02, PNorm = 136.2294, GNorm = 0.3591, lr_0 = 5.1051e-04
Loss = 1.1968e-02, PNorm = 136.2529, GNorm = 0.4654, lr_0 = 5.1016e-04
Loss = 1.3282e-02, PNorm = 136.2705, GNorm = 0.2327, lr_0 = 5.0981e-04
Loss = 1.3962e-02, PNorm = 136.2968, GNorm = 0.4057, lr_0 = 5.0946e-04
Loss = 1.2545e-02, PNorm = 136.3283, GNorm = 0.3340, lr_0 = 5.0911e-04
Loss = 1.2739e-02, PNorm = 136.3565, GNorm = 0.2595, lr_0 = 5.0877e-04
Loss = 1.1484e-02, PNorm = 136.3813, GNorm = 0.4478, lr_0 = 5.0842e-04
Loss = 1.2097e-02, PNorm = 136.4011, GNorm = 0.5590, lr_0 = 5.0807e-04
Loss = 1.0340e-02, PNorm = 136.4298, GNorm = 0.1367, lr_0 = 5.0772e-04
Loss = 1.0897e-02, PNorm = 136.4553, GNorm = 0.1770, lr_0 = 5.0737e-04
Loss = 1.1005e-02, PNorm = 136.4792, GNorm = 0.2928, lr_0 = 5.0703e-04
Loss = 1.3680e-02, PNorm = 136.5052, GNorm = 0.2690, lr_0 = 5.0668e-04
Loss = 1.0140e-02, PNorm = 136.5315, GNorm = 0.1506, lr_0 = 5.0633e-04
Loss = 1.2368e-02, PNorm = 136.5588, GNorm = 0.2153, lr_0 = 5.0598e-04
Loss = 1.5060e-02, PNorm = 136.5848, GNorm = 0.3047, lr_0 = 5.0564e-04
Loss = 1.2662e-02, PNorm = 136.6074, GNorm = 0.8238, lr_0 = 5.0529e-04
Loss = 1.1531e-02, PNorm = 136.6370, GNorm = 0.6401, lr_0 = 5.0494e-04
Loss = 1.1024e-02, PNorm = 136.6651, GNorm = 0.4039, lr_0 = 5.0460e-04
Loss = 1.0702e-02, PNorm = 136.6939, GNorm = 0.1439, lr_0 = 5.0425e-04
Loss = 1.2229e-02, PNorm = 136.7176, GNorm = 0.3246, lr_0 = 5.0391e-04
Loss = 1.1749e-02, PNorm = 136.7420, GNorm = 0.2636, lr_0 = 5.0356e-04
Loss = 1.0655e-02, PNorm = 136.7696, GNorm = 0.5432, lr_0 = 5.0322e-04
Loss = 1.2656e-02, PNorm = 136.7951, GNorm = 0.1684, lr_0 = 5.0287e-04
Loss = 1.1110e-02, PNorm = 136.8192, GNorm = 0.3450, lr_0 = 5.0253e-04
Loss = 1.2176e-02, PNorm = 136.8446, GNorm = 0.4961, lr_0 = 5.0218e-04
Loss = 1.2858e-02, PNorm = 136.8714, GNorm = 0.1501, lr_0 = 5.0184e-04
Loss = 1.3319e-02, PNorm = 136.8988, GNorm = 0.4995, lr_0 = 5.0150e-04
Loss = 1.2540e-02, PNorm = 136.9248, GNorm = 0.1512, lr_0 = 5.0115e-04
Loss = 1.3948e-02, PNorm = 136.9504, GNorm = 0.1255, lr_0 = 5.0081e-04
Loss = 1.2628e-02, PNorm = 136.9807, GNorm = 0.4730, lr_0 = 5.0047e-04
Loss = 1.0952e-02, PNorm = 137.0108, GNorm = 0.1619, lr_0 = 5.0012e-04
Loss = 1.1337e-02, PNorm = 137.0400, GNorm = 0.1320, lr_0 = 4.9978e-04
Loss = 1.3620e-02, PNorm = 137.0711, GNorm = 0.2592, lr_0 = 4.9944e-04
Loss = 1.3623e-02, PNorm = 137.1063, GNorm = 0.4031, lr_0 = 4.9910e-04
Loss = 1.1089e-02, PNorm = 137.1365, GNorm = 0.5917, lr_0 = 4.9875e-04
Loss = 1.2348e-02, PNorm = 137.1649, GNorm = 0.5288, lr_0 = 4.9841e-04
Loss = 1.2229e-02, PNorm = 137.1971, GNorm = 0.1242, lr_0 = 4.9807e-04
Loss = 1.2010e-02, PNorm = 137.2258, GNorm = 0.3037, lr_0 = 4.9773e-04
Loss = 1.3133e-02, PNorm = 137.2506, GNorm = 0.2399, lr_0 = 4.9739e-04
Loss = 1.3082e-02, PNorm = 137.2768, GNorm = 0.1615, lr_0 = 4.9705e-04
Loss = 1.0852e-02, PNorm = 137.3013, GNorm = 0.3182, lr_0 = 4.9671e-04
Loss = 1.2075e-02, PNorm = 137.3275, GNorm = 0.1228, lr_0 = 4.9637e-04
Loss = 1.1641e-02, PNorm = 137.3493, GNorm = 0.8543, lr_0 = 4.9603e-04
Loss = 1.0809e-02, PNorm = 137.3748, GNorm = 0.3676, lr_0 = 4.9569e-04
Loss = 2.3022e-02, PNorm = 137.4011, GNorm = 0.2128, lr_0 = 4.9535e-04
Loss = 1.1456e-02, PNorm = 137.4286, GNorm = 0.1661, lr_0 = 4.9501e-04
Loss = 1.0084e-02, PNorm = 137.4576, GNorm = 0.4546, lr_0 = 4.9467e-04
Loss = 1.2737e-02, PNorm = 137.4812, GNorm = 0.1938, lr_0 = 4.9433e-04
Loss = 1.1446e-02, PNorm = 137.5062, GNorm = 0.1363, lr_0 = 4.9399e-04
Loss = 1.1184e-02, PNorm = 137.5360, GNorm = 0.3096, lr_0 = 4.9365e-04
Loss = 1.2406e-02, PNorm = 137.5655, GNorm = 0.2488, lr_0 = 4.9332e-04
Loss = 1.1241e-02, PNorm = 137.5934, GNorm = 0.2818, lr_0 = 4.9298e-04
Loss = 1.3448e-02, PNorm = 137.6186, GNorm = 0.4378, lr_0 = 4.9264e-04
Loss = 1.3660e-02, PNorm = 137.6422, GNorm = 0.2548, lr_0 = 4.9230e-04
Loss = 1.1332e-02, PNorm = 137.6737, GNorm = 0.4312, lr_0 = 4.9197e-04
Loss = 1.2947e-02, PNorm = 137.7008, GNorm = 0.3003, lr_0 = 4.9163e-04
Loss = 1.4285e-02, PNorm = 137.7321, GNorm = 0.3793, lr_0 = 4.9129e-04
Loss = 1.2182e-02, PNorm = 137.7625, GNorm = 0.1446, lr_0 = 4.9095e-04
Loss = 1.1107e-02, PNorm = 137.7887, GNorm = 0.5305, lr_0 = 4.9062e-04
Loss = 1.1708e-02, PNorm = 137.8192, GNorm = 0.2976, lr_0 = 4.9028e-04
Loss = 9.8237e-03, PNorm = 137.8435, GNorm = 0.1679, lr_0 = 4.8995e-04
Loss = 1.2478e-02, PNorm = 137.8714, GNorm = 0.2068, lr_0 = 4.8961e-04
Loss = 1.2031e-02, PNorm = 137.9025, GNorm = 0.2320, lr_0 = 4.8928e-04
Loss = 1.4522e-02, PNorm = 137.9402, GNorm = 0.5277, lr_0 = 4.8894e-04
Loss = 1.5374e-02, PNorm = 137.9725, GNorm = 0.2900, lr_0 = 4.8861e-04
Loss = 1.3766e-02, PNorm = 137.9998, GNorm = 0.4444, lr_0 = 4.8827e-04
Loss = 1.1967e-02, PNorm = 138.0248, GNorm = 0.1113, lr_0 = 4.8794e-04
Loss = 1.2478e-02, PNorm = 138.0580, GNorm = 0.6432, lr_0 = 4.8760e-04
Loss = 1.0886e-02, PNorm = 138.0892, GNorm = 0.1818, lr_0 = 4.8727e-04
Loss = 1.4236e-02, PNorm = 138.1201, GNorm = 0.5307, lr_0 = 4.8693e-04
Loss = 1.4265e-02, PNorm = 138.1547, GNorm = 0.1449, lr_0 = 4.8660e-04
Loss = 1.3109e-02, PNorm = 138.1885, GNorm = 0.3078, lr_0 = 4.8627e-04
Loss = 1.1792e-02, PNorm = 138.2214, GNorm = 0.2902, lr_0 = 4.8593e-04
Loss = 1.3636e-02, PNorm = 138.2490, GNorm = 0.2363, lr_0 = 4.8560e-04
Loss = 1.1966e-02, PNorm = 138.2849, GNorm = 0.3870, lr_0 = 4.8527e-04
Loss = 1.0595e-02, PNorm = 138.3144, GNorm = 0.2060, lr_0 = 4.8494e-04
Loss = 1.1322e-02, PNorm = 138.3424, GNorm = 0.1486, lr_0 = 4.8460e-04
Loss = 1.3151e-02, PNorm = 138.3657, GNorm = 0.4262, lr_0 = 4.8427e-04
Loss = 1.2026e-02, PNorm = 138.3934, GNorm = 0.3199, lr_0 = 4.8394e-04
Loss = 1.2085e-02, PNorm = 138.4246, GNorm = 0.4151, lr_0 = 4.8361e-04
Loss = 1.2261e-02, PNorm = 138.4536, GNorm = 0.3823, lr_0 = 4.8328e-04
Loss = 1.3065e-02, PNorm = 138.4799, GNorm = 0.2098, lr_0 = 4.8295e-04
Loss = 1.1749e-02, PNorm = 138.5073, GNorm = 0.4001, lr_0 = 4.8262e-04
Loss = 1.0902e-02, PNorm = 138.5313, GNorm = 0.2338, lr_0 = 4.8228e-04
Loss = 1.2996e-02, PNorm = 138.5564, GNorm = 0.1725, lr_0 = 4.8195e-04
Loss = 1.3139e-02, PNorm = 138.5791, GNorm = 0.6204, lr_0 = 4.8162e-04
Loss = 1.1244e-02, PNorm = 138.6043, GNorm = 0.1743, lr_0 = 4.8129e-04
Loss = 1.2939e-02, PNorm = 138.6299, GNorm = 0.5491, lr_0 = 4.8096e-04
Loss = 1.1794e-02, PNorm = 138.6651, GNorm = 0.2253, lr_0 = 4.8064e-04
Loss = 1.5651e-02, PNorm = 138.6983, GNorm = 0.3151, lr_0 = 4.8031e-04
Loss = 1.2281e-02, PNorm = 138.7332, GNorm = 0.3167, lr_0 = 4.7998e-04
Loss = 1.2816e-02, PNorm = 138.7636, GNorm = 0.3239, lr_0 = 4.7965e-04
Loss = 1.3030e-02, PNorm = 138.7948, GNorm = 0.4891, lr_0 = 4.7932e-04
Loss = 1.1911e-02, PNorm = 138.8177, GNorm = 0.3124, lr_0 = 4.7899e-04
Loss = 1.2592e-02, PNorm = 138.8517, GNorm = 0.3201, lr_0 = 4.7866e-04
Loss = 1.2214e-02, PNorm = 138.8829, GNorm = 0.2490, lr_0 = 4.7833e-04
Loss = 1.1502e-02, PNorm = 138.9182, GNorm = 0.1237, lr_0 = 4.7801e-04
Loss = 1.5530e-02, PNorm = 138.9477, GNorm = 0.1340, lr_0 = 4.7768e-04
Loss = 1.2939e-02, PNorm = 138.9745, GNorm = 0.2816, lr_0 = 4.7735e-04
Loss = 1.4440e-02, PNorm = 138.9968, GNorm = 0.3473, lr_0 = 4.7703e-04
Validation mae = 0.483080
Epoch 11
Loss = 1.0159e-02, PNorm = 139.0230, GNorm = 0.2390, lr_0 = 4.7670e-04
Loss = 1.0908e-02, PNorm = 139.0471, GNorm = 0.1795, lr_0 = 4.7637e-04
Loss = 1.4676e-02, PNorm = 139.0704, GNorm = 0.1163, lr_0 = 4.7605e-04
Loss = 1.1387e-02, PNorm = 139.0887, GNorm = 0.1753, lr_0 = 4.7572e-04
Loss = 8.0872e-03, PNorm = 139.1079, GNorm = 0.1949, lr_0 = 4.7539e-04
Loss = 1.0549e-02, PNorm = 139.1285, GNorm = 0.1686, lr_0 = 4.7507e-04
Loss = 1.0024e-02, PNorm = 139.1483, GNorm = 0.2339, lr_0 = 4.7474e-04
Loss = 1.2093e-02, PNorm = 139.1657, GNorm = 0.7041, lr_0 = 4.7442e-04
Loss = 1.0871e-02, PNorm = 139.1885, GNorm = 0.2867, lr_0 = 4.7409e-04
Loss = 1.0558e-02, PNorm = 139.2100, GNorm = 0.4289, lr_0 = 4.7377e-04
Loss = 1.0671e-02, PNorm = 139.2247, GNorm = 0.1482, lr_0 = 4.7344e-04
Loss = 1.0486e-02, PNorm = 139.2400, GNorm = 0.2066, lr_0 = 4.7312e-04
Loss = 1.0468e-02, PNorm = 139.2587, GNorm = 0.2978, lr_0 = 4.7279e-04
Loss = 1.1358e-02, PNorm = 139.2783, GNorm = 0.1827, lr_0 = 4.7247e-04
Loss = 1.1856e-02, PNorm = 139.3004, GNorm = 0.2985, lr_0 = 4.7215e-04
Loss = 9.9567e-03, PNorm = 139.3220, GNorm = 0.4356, lr_0 = 4.7182e-04
Loss = 1.1415e-02, PNorm = 139.3405, GNorm = 0.4749, lr_0 = 4.7150e-04
Loss = 1.0955e-02, PNorm = 139.3553, GNorm = 0.2908, lr_0 = 4.7118e-04
Loss = 9.8315e-03, PNorm = 139.3768, GNorm = 0.1572, lr_0 = 4.7085e-04
Loss = 9.0885e-03, PNorm = 139.4013, GNorm = 0.1356, lr_0 = 4.7053e-04
Loss = 9.9760e-03, PNorm = 139.4280, GNorm = 0.3191, lr_0 = 4.7021e-04
Loss = 9.9574e-03, PNorm = 139.4499, GNorm = 0.1072, lr_0 = 4.6989e-04
Loss = 1.1635e-02, PNorm = 139.4714, GNorm = 0.4580, lr_0 = 4.6957e-04
Loss = 8.8712e-03, PNorm = 139.4911, GNorm = 0.3054, lr_0 = 4.6924e-04
Loss = 1.0974e-02, PNorm = 139.5083, GNorm = 0.3289, lr_0 = 4.6892e-04
Loss = 1.0208e-02, PNorm = 139.5316, GNorm = 0.5291, lr_0 = 4.6860e-04
Loss = 1.1368e-02, PNorm = 139.5545, GNorm = 0.3565, lr_0 = 4.6828e-04
Loss = 1.0684e-02, PNorm = 139.5739, GNorm = 0.2762, lr_0 = 4.6796e-04
Loss = 8.6378e-03, PNorm = 139.5919, GNorm = 0.3498, lr_0 = 4.6764e-04
Loss = 8.9306e-03, PNorm = 139.6143, GNorm = 0.1857, lr_0 = 4.6732e-04
Loss = 1.0393e-02, PNorm = 139.6354, GNorm = 0.3491, lr_0 = 4.6700e-04
Loss = 1.0039e-02, PNorm = 139.6510, GNorm = 0.1394, lr_0 = 4.6668e-04
Loss = 9.1229e-03, PNorm = 139.6661, GNorm = 0.3364, lr_0 = 4.6636e-04
Loss = 1.0878e-02, PNorm = 139.6877, GNorm = 0.4413, lr_0 = 4.6604e-04
Loss = 9.4075e-03, PNorm = 139.7035, GNorm = 0.2724, lr_0 = 4.6572e-04
Loss = 1.1695e-02, PNorm = 139.7202, GNorm = 0.3139, lr_0 = 4.6540e-04
Loss = 1.0600e-02, PNorm = 139.7430, GNorm = 0.2713, lr_0 = 4.6508e-04
Loss = 1.0906e-02, PNorm = 139.7750, GNorm = 0.2625, lr_0 = 4.6476e-04
Loss = 1.1683e-02, PNorm = 139.8069, GNorm = 0.3760, lr_0 = 4.6445e-04
Loss = 9.3307e-03, PNorm = 139.8363, GNorm = 0.1822, lr_0 = 4.6413e-04
Loss = 9.3101e-03, PNorm = 139.8612, GNorm = 0.1337, lr_0 = 4.6381e-04
Loss = 8.9110e-03, PNorm = 139.8819, GNorm = 0.0997, lr_0 = 4.6349e-04
Loss = 9.6862e-03, PNorm = 139.9031, GNorm = 0.2941, lr_0 = 4.6317e-04
Loss = 8.4225e-03, PNorm = 139.9226, GNorm = 0.1339, lr_0 = 4.6286e-04
Loss = 9.4708e-03, PNorm = 139.9458, GNorm = 0.2459, lr_0 = 4.6254e-04
Loss = 1.1199e-02, PNorm = 139.9682, GNorm = 0.2070, lr_0 = 4.6222e-04
Loss = 9.7430e-03, PNorm = 139.9912, GNorm = 0.2074, lr_0 = 4.6191e-04
Loss = 1.1175e-02, PNorm = 140.0169, GNorm = 0.1597, lr_0 = 4.6159e-04
Loss = 9.1204e-03, PNorm = 140.0426, GNorm = 0.2603, lr_0 = 4.6127e-04
Loss = 9.9853e-03, PNorm = 140.0665, GNorm = 0.3226, lr_0 = 4.6096e-04
Loss = 9.3574e-03, PNorm = 140.0880, GNorm = 0.2626, lr_0 = 4.6064e-04
Loss = 1.0675e-02, PNorm = 140.1082, GNorm = 0.1781, lr_0 = 4.6033e-04
Loss = 9.2887e-03, PNorm = 140.1276, GNorm = 0.1943, lr_0 = 4.6001e-04
Loss = 9.4026e-03, PNorm = 140.1521, GNorm = 0.3804, lr_0 = 4.5970e-04
Loss = 1.1664e-02, PNorm = 140.1751, GNorm = 0.4880, lr_0 = 4.5938e-04
Loss = 1.0565e-02, PNorm = 140.2017, GNorm = 0.2278, lr_0 = 4.5907e-04
Loss = 9.9314e-03, PNorm = 140.2219, GNorm = 0.2843, lr_0 = 4.5875e-04
Loss = 9.2679e-03, PNorm = 140.2459, GNorm = 0.2045, lr_0 = 4.5844e-04
Loss = 9.3167e-03, PNorm = 140.2718, GNorm = 0.1399, lr_0 = 4.5812e-04
Loss = 9.9107e-03, PNorm = 140.2975, GNorm = 0.1125, lr_0 = 4.5781e-04
Loss = 8.7830e-03, PNorm = 140.3215, GNorm = 0.4442, lr_0 = 4.5750e-04
Loss = 1.0205e-02, PNorm = 140.3438, GNorm = 0.1024, lr_0 = 4.5718e-04
Loss = 8.9446e-03, PNorm = 140.3689, GNorm = 0.1556, lr_0 = 4.5687e-04
Loss = 1.0872e-02, PNorm = 140.3862, GNorm = 0.5830, lr_0 = 4.5656e-04
Loss = 9.7126e-03, PNorm = 140.4093, GNorm = 0.4545, lr_0 = 4.5624e-04
Loss = 8.3508e-03, PNorm = 140.4312, GNorm = 0.5171, lr_0 = 4.5593e-04
Loss = 1.2450e-02, PNorm = 140.4551, GNorm = 0.1897, lr_0 = 4.5562e-04
Loss = 1.1700e-02, PNorm = 140.4755, GNorm = 0.1535, lr_0 = 4.5531e-04
Loss = 1.1759e-02, PNorm = 140.5009, GNorm = 0.4555, lr_0 = 4.5499e-04
Loss = 1.0659e-02, PNorm = 140.5285, GNorm = 0.1331, lr_0 = 4.5468e-04
Loss = 9.9001e-03, PNorm = 140.5581, GNorm = 0.4088, lr_0 = 4.5437e-04
Loss = 1.0210e-02, PNorm = 140.5889, GNorm = 0.2845, lr_0 = 4.5406e-04
Loss = 1.0197e-02, PNorm = 140.6140, GNorm = 0.1972, lr_0 = 4.5375e-04
Loss = 9.4850e-03, PNorm = 140.6351, GNorm = 0.5318, lr_0 = 4.5344e-04
Loss = 1.1846e-02, PNorm = 140.6606, GNorm = 0.1542, lr_0 = 4.5313e-04
Loss = 9.7265e-03, PNorm = 140.6909, GNorm = 0.3894, lr_0 = 4.5282e-04
Loss = 8.2822e-03, PNorm = 140.7210, GNorm = 0.1027, lr_0 = 4.5251e-04
Loss = 9.9641e-03, PNorm = 140.7471, GNorm = 0.1158, lr_0 = 4.5220e-04
Loss = 1.6827e-02, PNorm = 140.7675, GNorm = 0.4874, lr_0 = 4.5189e-04
Loss = 1.1008e-02, PNorm = 140.7965, GNorm = 0.2531, lr_0 = 4.5158e-04
Loss = 8.5492e-03, PNorm = 140.8229, GNorm = 0.4242, lr_0 = 4.5127e-04
Loss = 9.2671e-03, PNorm = 140.8464, GNorm = 0.2111, lr_0 = 4.5096e-04
Loss = 1.0127e-02, PNorm = 140.8724, GNorm = 0.1808, lr_0 = 4.5065e-04
Loss = 9.5990e-03, PNorm = 140.8969, GNorm = 0.2316, lr_0 = 4.5034e-04
Loss = 1.0540e-02, PNorm = 140.9193, GNorm = 0.2273, lr_0 = 4.5003e-04
Loss = 1.3598e-02, PNorm = 140.9388, GNorm = 0.2034, lr_0 = 4.4972e-04
Loss = 1.1464e-02, PNorm = 140.9637, GNorm = 0.3812, lr_0 = 4.4942e-04
Loss = 9.1059e-03, PNorm = 140.9926, GNorm = 0.1260, lr_0 = 4.4911e-04
Loss = 9.1121e-03, PNorm = 141.0206, GNorm = 0.1771, lr_0 = 4.4880e-04
Loss = 8.5252e-03, PNorm = 141.0412, GNorm = 0.3622, lr_0 = 4.4849e-04
Loss = 1.0217e-02, PNorm = 141.0622, GNorm = 0.2625, lr_0 = 4.4819e-04
Loss = 8.6826e-03, PNorm = 141.0839, GNorm = 0.2624, lr_0 = 4.4788e-04
Loss = 1.0014e-02, PNorm = 141.1081, GNorm = 0.1275, lr_0 = 4.4757e-04
Loss = 1.2048e-02, PNorm = 141.1244, GNorm = 0.2622, lr_0 = 4.4727e-04
Loss = 1.5726e-02, PNorm = 141.1495, GNorm = 0.2724, lr_0 = 4.4696e-04
Loss = 9.3926e-03, PNorm = 141.1691, GNorm = 0.1397, lr_0 = 4.4665e-04
Loss = 1.0045e-02, PNorm = 141.1980, GNorm = 0.1369, lr_0 = 4.4635e-04
Loss = 7.7964e-03, PNorm = 141.2281, GNorm = 0.2829, lr_0 = 4.4604e-04
Loss = 8.4118e-03, PNorm = 141.2540, GNorm = 0.2468, lr_0 = 4.4574e-04
Loss = 8.6514e-03, PNorm = 141.2771, GNorm = 0.1904, lr_0 = 4.4543e-04
Loss = 1.0195e-02, PNorm = 141.2981, GNorm = 0.2889, lr_0 = 4.4513e-04
Loss = 9.8562e-03, PNorm = 141.3222, GNorm = 0.2775, lr_0 = 4.4482e-04
Loss = 8.8874e-03, PNorm = 141.3455, GNorm = 0.5523, lr_0 = 4.4452e-04
Loss = 1.1484e-02, PNorm = 141.3691, GNorm = 0.1735, lr_0 = 4.4421e-04
Loss = 1.0120e-02, PNorm = 141.3904, GNorm = 0.3918, lr_0 = 4.4391e-04
Loss = 1.1392e-02, PNorm = 141.4124, GNorm = 0.2716, lr_0 = 4.4360e-04
Loss = 1.0905e-02, PNorm = 141.4317, GNorm = 0.1357, lr_0 = 4.4330e-04
Loss = 8.2783e-03, PNorm = 141.4563, GNorm = 0.3425, lr_0 = 4.4299e-04
Loss = 9.3091e-03, PNorm = 141.4818, GNorm = 0.2365, lr_0 = 4.4269e-04
Loss = 1.0141e-02, PNorm = 141.5006, GNorm = 0.1643, lr_0 = 4.4239e-04
Loss = 1.0846e-02, PNorm = 141.5198, GNorm = 0.1500, lr_0 = 4.4209e-04
Loss = 1.2537e-02, PNorm = 141.5430, GNorm = 0.4005, lr_0 = 4.4178e-04
Loss = 8.9862e-03, PNorm = 141.5741, GNorm = 0.2691, lr_0 = 4.4148e-04
Loss = 9.2930e-03, PNorm = 141.6000, GNorm = 0.3730, lr_0 = 4.4118e-04
Loss = 1.0256e-02, PNorm = 141.6242, GNorm = 0.4494, lr_0 = 4.4088e-04
Loss = 1.1976e-02, PNorm = 141.6479, GNorm = 0.1644, lr_0 = 4.4057e-04
Loss = 9.9201e-03, PNorm = 141.6677, GNorm = 0.1548, lr_0 = 4.4027e-04
Loss = 1.0065e-02, PNorm = 141.6903, GNorm = 0.5216, lr_0 = 4.3997e-04
Loss = 1.3383e-02, PNorm = 141.7188, GNorm = 0.2385, lr_0 = 4.3967e-04
Loss = 1.1977e-02, PNorm = 141.7448, GNorm = 0.3643, lr_0 = 4.3937e-04
Validation mae = 0.478930
Epoch 12
Loss = 8.3601e-03, PNorm = 141.7632, GNorm = 0.1784, lr_0 = 4.3907e-04
Loss = 9.0233e-03, PNorm = 141.7801, GNorm = 0.2541, lr_0 = 4.3877e-04
Loss = 9.8063e-03, PNorm = 141.7960, GNorm = 0.4135, lr_0 = 4.3846e-04
Loss = 8.2405e-03, PNorm = 141.8150, GNorm = 0.2377, lr_0 = 4.3816e-04
Loss = 8.9135e-03, PNorm = 141.8312, GNorm = 0.4284, lr_0 = 4.3786e-04
Loss = 8.8394e-03, PNorm = 141.8483, GNorm = 0.3133, lr_0 = 4.3756e-04
Loss = 1.1342e-02, PNorm = 141.8671, GNorm = 0.2600, lr_0 = 4.3726e-04
Loss = 8.3887e-03, PNorm = 141.8852, GNorm = 0.2469, lr_0 = 4.3696e-04
Loss = 1.1536e-02, PNorm = 141.9059, GNorm = 0.2427, lr_0 = 4.3667e-04
Loss = 7.8921e-03, PNorm = 141.9205, GNorm = 0.1318, lr_0 = 4.3637e-04
Loss = 8.3810e-03, PNorm = 141.9376, GNorm = 0.1776, lr_0 = 4.3607e-04
Loss = 7.9054e-03, PNorm = 141.9593, GNorm = 0.1759, lr_0 = 4.3577e-04
Loss = 9.2149e-03, PNorm = 141.9807, GNorm = 0.1585, lr_0 = 4.3547e-04
Loss = 8.1392e-03, PNorm = 142.0029, GNorm = 0.4529, lr_0 = 4.3517e-04
Loss = 7.9718e-03, PNorm = 142.0201, GNorm = 0.1174, lr_0 = 4.3487e-04
Loss = 1.0708e-02, PNorm = 142.0367, GNorm = 0.1737, lr_0 = 4.3458e-04
Loss = 8.5691e-03, PNorm = 142.0575, GNorm = 0.2726, lr_0 = 4.3428e-04
Loss = 9.2128e-03, PNorm = 142.0761, GNorm = 0.2365, lr_0 = 4.3398e-04
Loss = 8.5517e-03, PNorm = 142.0912, GNorm = 0.2854, lr_0 = 4.3368e-04
Loss = 7.8074e-03, PNorm = 142.1043, GNorm = 0.2549, lr_0 = 4.3339e-04
Loss = 7.7083e-03, PNorm = 142.1211, GNorm = 0.2020, lr_0 = 4.3309e-04
Loss = 9.5145e-03, PNorm = 142.1405, GNorm = 0.1995, lr_0 = 4.3279e-04
Loss = 7.7513e-03, PNorm = 142.1615, GNorm = 0.1078, lr_0 = 4.3250e-04
Loss = 7.8074e-03, PNorm = 142.1758, GNorm = 0.2660, lr_0 = 4.3220e-04
Loss = 7.7821e-03, PNorm = 142.1919, GNorm = 0.1786, lr_0 = 4.3190e-04
Loss = 8.0036e-03, PNorm = 142.2130, GNorm = 0.2787, lr_0 = 4.3161e-04
Loss = 9.3550e-03, PNorm = 142.2382, GNorm = 0.6322, lr_0 = 4.3131e-04
Loss = 9.6901e-03, PNorm = 142.2565, GNorm = 0.1936, lr_0 = 4.3102e-04
Loss = 7.6484e-03, PNorm = 142.2742, GNorm = 0.3202, lr_0 = 4.3072e-04
Loss = 1.0134e-02, PNorm = 142.2921, GNorm = 0.1089, lr_0 = 4.3043e-04
Loss = 7.0377e-03, PNorm = 142.3066, GNorm = 0.2325, lr_0 = 4.3013e-04
Loss = 8.8404e-03, PNorm = 142.3249, GNorm = 0.0702, lr_0 = 4.2984e-04
Loss = 1.1946e-02, PNorm = 142.3456, GNorm = 0.3161, lr_0 = 4.2954e-04
Loss = 8.0212e-03, PNorm = 142.3618, GNorm = 0.2222, lr_0 = 4.2925e-04
Loss = 7.8040e-03, PNorm = 142.3787, GNorm = 0.1511, lr_0 = 4.2895e-04
Loss = 9.0367e-03, PNorm = 142.3961, GNorm = 0.0969, lr_0 = 4.2866e-04
Loss = 7.9031e-03, PNorm = 142.4207, GNorm = 0.4364, lr_0 = 4.2837e-04
Loss = 8.8807e-03, PNorm = 142.4412, GNorm = 0.2193, lr_0 = 4.2807e-04
Loss = 8.8359e-03, PNorm = 142.4635, GNorm = 0.3674, lr_0 = 4.2778e-04
Loss = 9.0580e-03, PNorm = 142.4788, GNorm = 0.2491, lr_0 = 4.2749e-04
Loss = 7.1360e-03, PNorm = 142.4957, GNorm = 0.2365, lr_0 = 4.2719e-04
Loss = 7.0041e-03, PNorm = 142.5128, GNorm = 0.0706, lr_0 = 4.2690e-04
Loss = 1.0253e-02, PNorm = 142.5308, GNorm = 0.4817, lr_0 = 4.2661e-04
Loss = 7.7902e-03, PNorm = 142.5463, GNorm = 0.3759, lr_0 = 4.2632e-04
Loss = 7.2634e-03, PNorm = 142.5656, GNorm = 0.1922, lr_0 = 4.2602e-04
Loss = 8.3079e-03, PNorm = 142.5836, GNorm = 0.1748, lr_0 = 4.2573e-04
Loss = 1.0066e-02, PNorm = 142.5994, GNorm = 0.1217, lr_0 = 4.2544e-04
Loss = 9.8283e-03, PNorm = 142.6159, GNorm = 0.2384, lr_0 = 4.2515e-04
Loss = 9.0001e-03, PNorm = 142.6333, GNorm = 0.1508, lr_0 = 4.2486e-04
Loss = 9.7013e-03, PNorm = 142.6480, GNorm = 0.4765, lr_0 = 4.2457e-04
Loss = 7.1381e-03, PNorm = 142.6697, GNorm = 0.4355, lr_0 = 4.2428e-04
Loss = 9.5916e-03, PNorm = 142.6907, GNorm = 0.2771, lr_0 = 4.2399e-04
Loss = 9.4421e-03, PNorm = 142.7107, GNorm = 0.2083, lr_0 = 4.2370e-04
Loss = 7.6319e-03, PNorm = 142.7348, GNorm = 0.2763, lr_0 = 4.2340e-04
Loss = 9.5746e-03, PNorm = 142.7536, GNorm = 0.1434, lr_0 = 4.2311e-04
Loss = 8.7325e-03, PNorm = 142.7687, GNorm = 0.2144, lr_0 = 4.2283e-04
Loss = 9.6308e-03, PNorm = 142.7857, GNorm = 0.3747, lr_0 = 4.2254e-04
Loss = 9.9992e-03, PNorm = 142.8036, GNorm = 0.1883, lr_0 = 4.2225e-04
Loss = 7.3482e-03, PNorm = 142.8226, GNorm = 0.3103, lr_0 = 4.2196e-04
Loss = 8.3496e-03, PNorm = 142.8415, GNorm = 0.2561, lr_0 = 4.2167e-04
Loss = 1.2501e-02, PNorm = 142.8581, GNorm = 0.5533, lr_0 = 4.2138e-04
Loss = 8.8058e-03, PNorm = 142.8736, GNorm = 0.4560, lr_0 = 4.2109e-04
Loss = 7.3364e-03, PNorm = 142.8907, GNorm = 0.1136, lr_0 = 4.2080e-04
Loss = 7.3966e-03, PNorm = 142.9120, GNorm = 0.3622, lr_0 = 4.2051e-04
Loss = 6.8011e-03, PNorm = 142.9341, GNorm = 0.2201, lr_0 = 4.2023e-04
Loss = 7.4464e-03, PNorm = 142.9546, GNorm = 0.3285, lr_0 = 4.1994e-04
Loss = 9.3426e-03, PNorm = 142.9688, GNorm = 0.5124, lr_0 = 4.1965e-04
Loss = 9.2193e-03, PNorm = 142.9881, GNorm = 0.2305, lr_0 = 4.1936e-04
Loss = 7.9893e-03, PNorm = 143.0089, GNorm = 0.4169, lr_0 = 4.1907e-04
Loss = 7.5543e-03, PNorm = 143.0285, GNorm = 0.4167, lr_0 = 4.1879e-04
Loss = 9.1580e-03, PNorm = 143.0489, GNorm = 0.0930, lr_0 = 4.1850e-04
Loss = 1.1308e-02, PNorm = 143.0707, GNorm = 0.3714, lr_0 = 4.1821e-04
Loss = 8.9598e-03, PNorm = 143.0915, GNorm = 0.2839, lr_0 = 4.1793e-04
Loss = 7.7510e-03, PNorm = 143.1151, GNorm = 0.4829, lr_0 = 4.1764e-04
Loss = 9.5643e-03, PNorm = 143.1352, GNorm = 0.3207, lr_0 = 4.1736e-04
Loss = 1.0310e-02, PNorm = 143.1576, GNorm = 0.4730, lr_0 = 4.1707e-04
Loss = 8.3293e-03, PNorm = 143.1824, GNorm = 0.2667, lr_0 = 4.1678e-04
Loss = 7.1728e-03, PNorm = 143.2032, GNorm = 0.2935, lr_0 = 4.1650e-04
Loss = 1.3658e-02, PNorm = 143.2250, GNorm = 1.1299, lr_0 = 4.1621e-04
Loss = 8.6686e-03, PNorm = 143.2451, GNorm = 0.2836, lr_0 = 4.1593e-04
Loss = 9.3907e-03, PNorm = 143.2663, GNorm = 0.2401, lr_0 = 4.1564e-04
Loss = 9.0537e-03, PNorm = 143.2906, GNorm = 0.1564, lr_0 = 4.1536e-04
Loss = 8.4735e-03, PNorm = 143.3122, GNorm = 0.3738, lr_0 = 4.1507e-04
Loss = 9.4056e-03, PNorm = 143.3328, GNorm = 0.1683, lr_0 = 4.1479e-04
Loss = 7.8712e-03, PNorm = 143.3504, GNorm = 0.4240, lr_0 = 4.1450e-04
Loss = 1.1532e-02, PNorm = 143.3707, GNorm = 0.2650, lr_0 = 4.1422e-04
Loss = 8.9069e-03, PNorm = 143.3996, GNorm = 0.1236, lr_0 = 4.1394e-04
Loss = 7.7959e-03, PNorm = 143.4261, GNorm = 0.2600, lr_0 = 4.1365e-04
Loss = 6.9130e-03, PNorm = 143.4519, GNorm = 0.1284, lr_0 = 4.1337e-04
Loss = 8.3077e-03, PNorm = 143.4688, GNorm = 0.2360, lr_0 = 4.1309e-04
Loss = 6.3178e-03, PNorm = 143.4850, GNorm = 0.3629, lr_0 = 4.1280e-04
Loss = 8.4430e-03, PNorm = 143.5047, GNorm = 0.3661, lr_0 = 4.1252e-04
Loss = 8.0761e-03, PNorm = 143.5270, GNorm = 0.4215, lr_0 = 4.1224e-04
Loss = 8.3964e-03, PNorm = 143.5485, GNorm = 0.0766, lr_0 = 4.1196e-04
Loss = 7.6672e-03, PNorm = 143.5675, GNorm = 0.1872, lr_0 = 4.1167e-04
Loss = 9.3925e-03, PNorm = 143.5844, GNorm = 0.1706, lr_0 = 4.1139e-04
Loss = 7.7326e-03, PNorm = 143.6007, GNorm = 0.4680, lr_0 = 4.1111e-04
Loss = 7.0414e-03, PNorm = 143.6175, GNorm = 0.3557, lr_0 = 4.1083e-04
Loss = 7.4132e-03, PNorm = 143.6349, GNorm = 0.2408, lr_0 = 4.1055e-04
Loss = 9.4296e-03, PNorm = 143.6530, GNorm = 0.3815, lr_0 = 4.1027e-04
Loss = 8.7724e-03, PNorm = 143.6738, GNorm = 0.1669, lr_0 = 4.0998e-04
Loss = 7.8495e-03, PNorm = 143.6902, GNorm = 0.0881, lr_0 = 4.0970e-04
Loss = 8.0465e-03, PNorm = 143.7110, GNorm = 0.2406, lr_0 = 4.0942e-04
Loss = 7.1646e-03, PNorm = 143.7300, GNorm = 0.2973, lr_0 = 4.0914e-04
Loss = 7.8455e-03, PNorm = 143.7481, GNorm = 0.2445, lr_0 = 4.0886e-04
Loss = 1.0186e-02, PNorm = 143.7678, GNorm = 0.2209, lr_0 = 4.0858e-04
Loss = 8.2735e-03, PNorm = 143.7913, GNorm = 0.2714, lr_0 = 4.0830e-04
Loss = 9.5499e-03, PNorm = 143.8111, GNorm = 0.1565, lr_0 = 4.0802e-04
Loss = 6.7417e-03, PNorm = 143.8332, GNorm = 0.4355, lr_0 = 4.0774e-04
Loss = 8.3817e-03, PNorm = 143.8533, GNorm = 0.2718, lr_0 = 4.0746e-04
Loss = 7.4497e-03, PNorm = 143.8751, GNorm = 0.4042, lr_0 = 4.0718e-04
Loss = 8.0745e-03, PNorm = 143.8967, GNorm = 0.2677, lr_0 = 4.0691e-04
Loss = 7.0595e-03, PNorm = 143.9192, GNorm = 0.0928, lr_0 = 4.0663e-04
Loss = 8.0230e-03, PNorm = 143.9414, GNorm = 0.3057, lr_0 = 4.0635e-04
Loss = 8.3140e-03, PNorm = 143.9581, GNorm = 0.3457, lr_0 = 4.0607e-04
Loss = 9.0642e-03, PNorm = 143.9787, GNorm = 0.2493, lr_0 = 4.0579e-04
Loss = 8.9148e-03, PNorm = 143.9963, GNorm = 0.4803, lr_0 = 4.0551e-04
Loss = 8.3511e-03, PNorm = 144.0221, GNorm = 0.2575, lr_0 = 4.0524e-04
Loss = 8.9296e-03, PNorm = 144.0487, GNorm = 0.2005, lr_0 = 4.0496e-04
Loss = 1.1745e-02, PNorm = 144.0696, GNorm = 0.4160, lr_0 = 4.0468e-04
Validation mae = 0.479884
Epoch 13
Loss = 8.9998e-03, PNorm = 144.0867, GNorm = 0.4138, lr_0 = 4.0440e-04
Loss = 6.7452e-03, PNorm = 144.0982, GNorm = 0.3686, lr_0 = 4.0413e-04
Loss = 6.6537e-03, PNorm = 144.1069, GNorm = 0.1987, lr_0 = 4.0385e-04
Loss = 9.2959e-03, PNorm = 144.1236, GNorm = 0.2900, lr_0 = 4.0357e-04
Loss = 8.5001e-03, PNorm = 144.1398, GNorm = 0.2509, lr_0 = 4.0330e-04
Loss = 9.1088e-03, PNorm = 144.1589, GNorm = 0.2490, lr_0 = 4.0302e-04
Loss = 8.2843e-03, PNorm = 144.1771, GNorm = 0.2634, lr_0 = 4.0274e-04
Loss = 7.4101e-03, PNorm = 144.1935, GNorm = 0.2590, lr_0 = 4.0247e-04
Loss = 6.8455e-03, PNorm = 144.2035, GNorm = 0.1385, lr_0 = 4.0219e-04
Loss = 8.5990e-03, PNorm = 144.2123, GNorm = 0.3087, lr_0 = 4.0192e-04
Loss = 7.4183e-03, PNorm = 144.2288, GNorm = 0.2071, lr_0 = 4.0164e-04
Loss = 1.0029e-02, PNorm = 144.2466, GNorm = 0.4084, lr_0 = 4.0137e-04
Loss = 7.0787e-03, PNorm = 144.2638, GNorm = 0.2499, lr_0 = 4.0109e-04
Loss = 7.2988e-03, PNorm = 144.2780, GNorm = 0.3181, lr_0 = 4.0082e-04
Loss = 6.6041e-03, PNorm = 144.2926, GNorm = 0.1880, lr_0 = 4.0054e-04
Loss = 7.9284e-03, PNorm = 144.3054, GNorm = 0.3088, lr_0 = 4.0027e-04
Loss = 6.9958e-03, PNorm = 144.3230, GNorm = 0.2456, lr_0 = 3.9999e-04
Loss = 6.7974e-03, PNorm = 144.3388, GNorm = 0.1996, lr_0 = 3.9972e-04
Loss = 7.3931e-03, PNorm = 144.3553, GNorm = 0.1285, lr_0 = 3.9945e-04
Loss = 8.3182e-03, PNorm = 144.3739, GNorm = 0.1599, lr_0 = 3.9917e-04
Loss = 6.6601e-03, PNorm = 144.3876, GNorm = 0.2457, lr_0 = 3.9890e-04
Loss = 9.0772e-03, PNorm = 144.3999, GNorm = 0.3386, lr_0 = 3.9863e-04
Loss = 7.0154e-03, PNorm = 144.4106, GNorm = 0.3661, lr_0 = 3.9835e-04
Loss = 8.2350e-03, PNorm = 144.4264, GNorm = 0.3363, lr_0 = 3.9808e-04
Loss = 6.7169e-03, PNorm = 144.4412, GNorm = 0.2278, lr_0 = 3.9781e-04
Loss = 7.5978e-03, PNorm = 144.4584, GNorm = 0.1386, lr_0 = 3.9753e-04
Loss = 9.3125e-03, PNorm = 144.4727, GNorm = 0.2269, lr_0 = 3.9726e-04
Loss = 8.1965e-03, PNorm = 144.4923, GNorm = 0.0875, lr_0 = 3.9699e-04
Loss = 6.4167e-03, PNorm = 144.5134, GNorm = 0.1922, lr_0 = 3.9672e-04
Loss = 7.0343e-03, PNorm = 144.5297, GNorm = 0.3850, lr_0 = 3.9645e-04
Loss = 7.5831e-03, PNorm = 144.5449, GNorm = 0.1350, lr_0 = 3.9617e-04
Loss = 9.6334e-03, PNorm = 144.5602, GNorm = 0.6710, lr_0 = 3.9590e-04
Loss = 8.7566e-03, PNorm = 144.5759, GNorm = 0.2060, lr_0 = 3.9563e-04
Loss = 7.4267e-03, PNorm = 144.5917, GNorm = 0.4323, lr_0 = 3.9536e-04
Loss = 7.6234e-03, PNorm = 144.6087, GNorm = 0.1099, lr_0 = 3.9509e-04
Loss = 6.4965e-03, PNorm = 144.6213, GNorm = 0.2223, lr_0 = 3.9482e-04
Loss = 5.3861e-03, PNorm = 144.6327, GNorm = 0.0933, lr_0 = 3.9455e-04
Loss = 5.9447e-03, PNorm = 144.6443, GNorm = 0.1731, lr_0 = 3.9428e-04
Loss = 6.7728e-03, PNorm = 144.6586, GNorm = 0.2450, lr_0 = 3.9401e-04
Loss = 6.2064e-03, PNorm = 144.6797, GNorm = 0.1579, lr_0 = 3.9374e-04
Loss = 8.3658e-03, PNorm = 144.6974, GNorm = 0.0998, lr_0 = 3.9347e-04
Loss = 6.3055e-03, PNorm = 144.7129, GNorm = 0.3482, lr_0 = 3.9320e-04
Loss = 7.8577e-03, PNorm = 144.7302, GNorm = 0.2723, lr_0 = 3.9293e-04
Loss = 5.6145e-03, PNorm = 144.7440, GNorm = 0.3102, lr_0 = 3.9266e-04
Loss = 7.2608e-03, PNorm = 144.7616, GNorm = 0.2020, lr_0 = 3.9239e-04
Loss = 5.8071e-03, PNorm = 144.7821, GNorm = 0.0743, lr_0 = 3.9212e-04
Loss = 6.7188e-03, PNorm = 144.7974, GNorm = 0.3083, lr_0 = 3.9185e-04
Loss = 1.0817e-02, PNorm = 144.8097, GNorm = 0.2665, lr_0 = 3.9159e-04
Loss = 7.9395e-03, PNorm = 144.8262, GNorm = 0.3787, lr_0 = 3.9132e-04
Loss = 8.3437e-03, PNorm = 144.8414, GNorm = 0.4371, lr_0 = 3.9105e-04
Loss = 7.0173e-03, PNorm = 144.8576, GNorm = 0.4770, lr_0 = 3.9078e-04
Loss = 6.7173e-03, PNorm = 144.8714, GNorm = 0.1982, lr_0 = 3.9051e-04
Loss = 6.8552e-03, PNorm = 144.8866, GNorm = 0.3842, lr_0 = 3.9025e-04
Loss = 7.4642e-03, PNorm = 144.9052, GNorm = 0.2389, lr_0 = 3.8998e-04
Loss = 6.7875e-03, PNorm = 144.9205, GNorm = 0.0930, lr_0 = 3.8971e-04
Loss = 6.7320e-03, PNorm = 144.9378, GNorm = 0.1850, lr_0 = 3.8945e-04
Loss = 7.2004e-03, PNorm = 144.9573, GNorm = 0.1623, lr_0 = 3.8918e-04
Loss = 7.6443e-03, PNorm = 144.9775, GNorm = 0.2116, lr_0 = 3.8891e-04
Loss = 6.6084e-03, PNorm = 144.9934, GNorm = 0.1622, lr_0 = 3.8865e-04
Loss = 6.9824e-03, PNorm = 145.0120, GNorm = 0.2964, lr_0 = 3.8838e-04
Loss = 8.4872e-03, PNorm = 145.0265, GNorm = 0.2377, lr_0 = 3.8811e-04
Loss = 6.5673e-03, PNorm = 145.0407, GNorm = 0.0888, lr_0 = 3.8785e-04
Loss = 5.7296e-03, PNorm = 145.0560, GNorm = 0.3749, lr_0 = 3.8758e-04
Loss = 6.5949e-03, PNorm = 145.0710, GNorm = 0.1201, lr_0 = 3.8732e-04
Loss = 7.4915e-03, PNorm = 145.0842, GNorm = 0.2051, lr_0 = 3.8705e-04
Loss = 7.6577e-03, PNorm = 145.1036, GNorm = 0.4082, lr_0 = 3.8679e-04
Loss = 6.6635e-03, PNorm = 145.1195, GNorm = 0.1669, lr_0 = 3.8652e-04
Loss = 8.3291e-03, PNorm = 145.1365, GNorm = 0.2304, lr_0 = 3.8626e-04
Loss = 5.8947e-03, PNorm = 145.1565, GNorm = 0.1143, lr_0 = 3.8599e-04
Loss = 6.3038e-03, PNorm = 145.1727, GNorm = 0.1323, lr_0 = 3.8573e-04
Loss = 8.0399e-03, PNorm = 145.1888, GNorm = 0.2511, lr_0 = 3.8546e-04
Loss = 8.3417e-03, PNorm = 145.2062, GNorm = 0.2402, lr_0 = 3.8520e-04
Loss = 1.0545e-02, PNorm = 145.2223, GNorm = 0.3735, lr_0 = 3.8493e-04
Loss = 6.6009e-03, PNorm = 145.2356, GNorm = 0.3914, lr_0 = 3.8467e-04
Loss = 7.7718e-03, PNorm = 145.2543, GNorm = 0.3331, lr_0 = 3.8441e-04
Loss = 6.3060e-03, PNorm = 145.2730, GNorm = 0.2797, lr_0 = 3.8414e-04
Loss = 7.6767e-03, PNorm = 145.2927, GNorm = 0.3672, lr_0 = 3.8388e-04
Loss = 6.4085e-03, PNorm = 145.3113, GNorm = 0.3235, lr_0 = 3.8362e-04
Loss = 7.5814e-03, PNorm = 145.3274, GNorm = 0.6119, lr_0 = 3.8336e-04
Loss = 6.7621e-03, PNorm = 145.3436, GNorm = 0.1972, lr_0 = 3.8309e-04
Loss = 7.5173e-03, PNorm = 145.3645, GNorm = 0.1732, lr_0 = 3.8283e-04
Loss = 7.4913e-03, PNorm = 145.3844, GNorm = 0.0993, lr_0 = 3.8257e-04
Loss = 7.6523e-03, PNorm = 145.3997, GNorm = 0.0791, lr_0 = 3.8231e-04
Loss = 6.5507e-03, PNorm = 145.4165, GNorm = 0.4104, lr_0 = 3.8204e-04
Loss = 6.6492e-03, PNorm = 145.4345, GNorm = 0.3274, lr_0 = 3.8178e-04
Loss = 6.6485e-03, PNorm = 145.4560, GNorm = 0.3800, lr_0 = 3.8152e-04
Loss = 8.2518e-03, PNorm = 145.4737, GNorm = 0.1770, lr_0 = 3.8126e-04
Loss = 7.5852e-03, PNorm = 145.4952, GNorm = 0.4082, lr_0 = 3.8100e-04
Loss = 7.1400e-03, PNorm = 145.5072, GNorm = 0.2537, lr_0 = 3.8074e-04
Loss = 9.9111e-03, PNorm = 145.5253, GNorm = 0.2073, lr_0 = 3.8048e-04
Loss = 1.1672e-02, PNorm = 145.5404, GNorm = 0.1871, lr_0 = 3.8022e-04
Loss = 6.7927e-03, PNorm = 145.5590, GNorm = 0.1773, lr_0 = 3.7995e-04
Loss = 9.9137e-03, PNorm = 145.5748, GNorm = 0.2012, lr_0 = 3.7969e-04
Loss = 7.5043e-03, PNorm = 145.5915, GNorm = 0.0723, lr_0 = 3.7943e-04
Loss = 6.0379e-03, PNorm = 145.6132, GNorm = 0.2989, lr_0 = 3.7917e-04
Loss = 8.3919e-03, PNorm = 145.6287, GNorm = 0.1760, lr_0 = 3.7891e-04
Loss = 6.9771e-03, PNorm = 145.6514, GNorm = 0.2041, lr_0 = 3.7866e-04
Loss = 6.7875e-03, PNorm = 145.6649, GNorm = 0.0633, lr_0 = 3.7840e-04
Loss = 7.4761e-03, PNorm = 145.6798, GNorm = 0.1757, lr_0 = 3.7814e-04
Loss = 6.2382e-03, PNorm = 145.6952, GNorm = 0.1499, lr_0 = 3.7788e-04
Loss = 7.9884e-03, PNorm = 145.7123, GNorm = 0.3253, lr_0 = 3.7762e-04
Loss = 8.1323e-03, PNorm = 145.7274, GNorm = 0.0661, lr_0 = 3.7736e-04
Loss = 5.3977e-03, PNorm = 145.7443, GNorm = 0.2002, lr_0 = 3.7710e-04
Loss = 6.5920e-03, PNorm = 145.7626, GNorm = 0.1700, lr_0 = 3.7684e-04
Loss = 8.1904e-03, PNorm = 145.7827, GNorm = 0.4191, lr_0 = 3.7659e-04
Loss = 6.1525e-03, PNorm = 145.7981, GNorm = 0.3533, lr_0 = 3.7633e-04
Loss = 9.5283e-03, PNorm = 145.8174, GNorm = 0.1370, lr_0 = 3.7607e-04
Loss = 7.8532e-03, PNorm = 145.8337, GNorm = 0.3261, lr_0 = 3.7581e-04
Loss = 7.4395e-03, PNorm = 145.8535, GNorm = 0.3354, lr_0 = 3.7555e-04
Loss = 6.9109e-03, PNorm = 145.8688, GNorm = 0.1948, lr_0 = 3.7530e-04
Loss = 6.4282e-03, PNorm = 145.8844, GNorm = 0.2713, lr_0 = 3.7504e-04
Loss = 7.3014e-03, PNorm = 145.8999, GNorm = 0.3792, lr_0 = 3.7478e-04
Loss = 6.2069e-03, PNorm = 145.9210, GNorm = 0.1210, lr_0 = 3.7453e-04
Loss = 6.7765e-03, PNorm = 145.9398, GNorm = 0.1335, lr_0 = 3.7427e-04
Loss = 5.9664e-03, PNorm = 145.9583, GNorm = 0.0674, lr_0 = 3.7401e-04
Loss = 6.9101e-03, PNorm = 145.9757, GNorm = 0.1424, lr_0 = 3.7376e-04
Loss = 6.1749e-03, PNorm = 145.9927, GNorm = 0.4551, lr_0 = 3.7350e-04
Loss = 8.4779e-03, PNorm = 146.0153, GNorm = 0.3985, lr_0 = 3.7325e-04
Loss = 5.6427e-03, PNorm = 146.0324, GNorm = 0.1503, lr_0 = 3.7299e-04
Loss = 7.9048e-03, PNorm = 146.0443, GNorm = 0.3311, lr_0 = 3.7273e-04
Validation mae = 0.479406
Epoch 14
Loss = 6.7523e-03, PNorm = 146.0585, GNorm = 0.1645, lr_0 = 3.7248e-04
Loss = 6.8709e-03, PNorm = 146.0688, GNorm = 0.1922, lr_0 = 3.7222e-04
Loss = 6.2106e-03, PNorm = 146.0819, GNorm = 0.1134, lr_0 = 3.7197e-04
Loss = 9.4438e-03, PNorm = 146.0971, GNorm = 0.2573, lr_0 = 3.7171e-04
Loss = 7.5288e-03, PNorm = 146.1171, GNorm = 0.1990, lr_0 = 3.7146e-04
Loss = 6.8849e-03, PNorm = 146.1294, GNorm = 0.4668, lr_0 = 3.7120e-04
Loss = 6.7342e-03, PNorm = 146.1404, GNorm = 0.2222, lr_0 = 3.7095e-04
Loss = 6.8788e-03, PNorm = 146.1517, GNorm = 0.1300, lr_0 = 3.7070e-04
Loss = 6.2707e-03, PNorm = 146.1616, GNorm = 0.1409, lr_0 = 3.7044e-04
Loss = 7.7979e-03, PNorm = 146.1754, GNorm = 0.9334, lr_0 = 3.7019e-04
Loss = 5.4287e-03, PNorm = 146.1873, GNorm = 0.0966, lr_0 = 3.6993e-04
Loss = 6.1170e-03, PNorm = 146.1998, GNorm = 0.3980, lr_0 = 3.6968e-04
Loss = 6.4165e-03, PNorm = 146.2105, GNorm = 0.1235, lr_0 = 3.6943e-04
Loss = 5.5074e-03, PNorm = 146.2224, GNorm = 0.1233, lr_0 = 3.6917e-04
Loss = 6.8678e-03, PNorm = 146.2327, GNorm = 0.2259, lr_0 = 3.6892e-04
Loss = 5.3141e-03, PNorm = 146.2462, GNorm = 0.2140, lr_0 = 3.6867e-04
Loss = 5.2127e-03, PNorm = 146.2605, GNorm = 0.5021, lr_0 = 3.6842e-04
Loss = 5.2796e-03, PNorm = 146.2712, GNorm = 0.3281, lr_0 = 3.6816e-04
Loss = 6.0996e-03, PNorm = 146.2833, GNorm = 0.2392, lr_0 = 3.6791e-04
Loss = 7.4885e-03, PNorm = 146.2959, GNorm = 0.7957, lr_0 = 3.6766e-04
Loss = 7.7705e-03, PNorm = 146.3127, GNorm = 0.4295, lr_0 = 3.6741e-04
Loss = 6.6481e-03, PNorm = 146.3257, GNorm = 0.0907, lr_0 = 3.6716e-04
Loss = 5.1331e-03, PNorm = 146.3408, GNorm = 0.1681, lr_0 = 3.6690e-04
Loss = 6.9030e-03, PNorm = 146.3546, GNorm = 0.2601, lr_0 = 3.6665e-04
Loss = 5.6304e-03, PNorm = 146.3685, GNorm = 0.2548, lr_0 = 3.6640e-04
Loss = 7.2635e-03, PNorm = 146.3835, GNorm = 0.2513, lr_0 = 3.6615e-04
Loss = 8.2018e-03, PNorm = 146.3951, GNorm = 0.1805, lr_0 = 3.6590e-04
Loss = 5.6812e-03, PNorm = 146.4087, GNorm = 0.3263, lr_0 = 3.6565e-04
Loss = 5.9691e-03, PNorm = 146.4214, GNorm = 0.2300, lr_0 = 3.6540e-04
Loss = 4.7426e-03, PNorm = 146.4332, GNorm = 0.0837, lr_0 = 3.6515e-04
Loss = 4.5367e-03, PNorm = 146.4430, GNorm = 0.2060, lr_0 = 3.6490e-04
Loss = 4.8629e-03, PNorm = 146.4528, GNorm = 0.1039, lr_0 = 3.6465e-04
Loss = 4.7711e-03, PNorm = 146.4615, GNorm = 0.2335, lr_0 = 3.6440e-04
Loss = 5.7213e-03, PNorm = 146.4696, GNorm = 0.1197, lr_0 = 3.6415e-04
Loss = 5.2928e-03, PNorm = 146.4773, GNorm = 0.2724, lr_0 = 3.6390e-04
Loss = 6.3221e-03, PNorm = 146.4888, GNorm = 0.3031, lr_0 = 3.6365e-04
Loss = 6.4938e-03, PNorm = 146.5026, GNorm = 0.3488, lr_0 = 3.6340e-04
Loss = 5.7452e-03, PNorm = 146.5186, GNorm = 0.1140, lr_0 = 3.6315e-04
Loss = 5.7669e-03, PNorm = 146.5342, GNorm = 0.3095, lr_0 = 3.6290e-04
Loss = 9.4223e-03, PNorm = 146.5506, GNorm = 0.0579, lr_0 = 3.6266e-04
Loss = 6.1186e-03, PNorm = 146.5595, GNorm = 0.3885, lr_0 = 3.6241e-04
Loss = 4.7644e-03, PNorm = 146.5717, GNorm = 0.1892, lr_0 = 3.6216e-04
Loss = 5.3385e-03, PNorm = 146.5866, GNorm = 0.2127, lr_0 = 3.6191e-04
Loss = 6.1854e-03, PNorm = 146.6006, GNorm = 0.0677, lr_0 = 3.6166e-04
Loss = 5.8233e-03, PNorm = 146.6123, GNorm = 0.2687, lr_0 = 3.6141e-04
Loss = 5.2543e-03, PNorm = 146.6248, GNorm = 0.4234, lr_0 = 3.6117e-04
Loss = 4.9718e-03, PNorm = 146.6340, GNorm = 0.0731, lr_0 = 3.6092e-04
Loss = 6.3836e-03, PNorm = 146.6472, GNorm = 0.2935, lr_0 = 3.6067e-04
Loss = 5.8124e-03, PNorm = 146.6626, GNorm = 0.1233, lr_0 = 3.6043e-04
Loss = 6.6042e-03, PNorm = 146.6789, GNorm = 0.1323, lr_0 = 3.6018e-04
Loss = 5.7905e-03, PNorm = 146.6916, GNorm = 0.2384, lr_0 = 3.5993e-04
Loss = 4.6692e-03, PNorm = 146.7030, GNorm = 0.1622, lr_0 = 3.5969e-04
Loss = 6.0345e-03, PNorm = 146.7172, GNorm = 0.2003, lr_0 = 3.5944e-04
Loss = 5.1929e-03, PNorm = 146.7320, GNorm = 0.1061, lr_0 = 3.5919e-04
Loss = 6.1448e-03, PNorm = 146.7523, GNorm = 0.2701, lr_0 = 3.5895e-04
Loss = 6.5884e-03, PNorm = 146.7675, GNorm = 0.2500, lr_0 = 3.5870e-04
Loss = 7.1314e-03, PNorm = 146.7815, GNorm = 0.1266, lr_0 = 3.5845e-04
Loss = 5.2537e-03, PNorm = 146.7916, GNorm = 0.4497, lr_0 = 3.5821e-04
Loss = 6.1026e-03, PNorm = 146.8016, GNorm = 0.4957, lr_0 = 3.5796e-04
Loss = 5.8782e-03, PNorm = 146.8139, GNorm = 0.1658, lr_0 = 3.5772e-04
Loss = 5.6322e-03, PNorm = 146.8301, GNorm = 0.2270, lr_0 = 3.5747e-04
Loss = 5.0527e-03, PNorm = 146.8488, GNorm = 0.1456, lr_0 = 3.5723e-04
Loss = 4.4862e-03, PNorm = 146.8624, GNorm = 0.2463, lr_0 = 3.5698e-04
Loss = 5.9844e-03, PNorm = 146.8709, GNorm = 0.1526, lr_0 = 3.5674e-04
Loss = 5.0793e-03, PNorm = 146.8806, GNorm = 0.0620, lr_0 = 3.5650e-04
Loss = 6.1844e-03, PNorm = 146.8918, GNorm = 0.1560, lr_0 = 3.5625e-04
Loss = 4.5429e-03, PNorm = 146.9046, GNorm = 0.1830, lr_0 = 3.5601e-04
Loss = 6.3985e-03, PNorm = 146.9140, GNorm = 0.0894, lr_0 = 3.5576e-04
Loss = 6.0055e-03, PNorm = 146.9240, GNorm = 0.2414, lr_0 = 3.5552e-04
Loss = 5.1646e-03, PNorm = 146.9364, GNorm = 0.1351, lr_0 = 3.5528e-04
Loss = 5.9857e-03, PNorm = 146.9544, GNorm = 0.2350, lr_0 = 3.5503e-04
Loss = 5.5619e-03, PNorm = 146.9668, GNorm = 0.1153, lr_0 = 3.5479e-04
Loss = 8.3214e-03, PNorm = 146.9764, GNorm = 0.1926, lr_0 = 3.5455e-04
Loss = 5.8320e-03, PNorm = 146.9898, GNorm = 0.3240, lr_0 = 3.5430e-04
Loss = 5.7436e-03, PNorm = 147.0024, GNorm = 0.1974, lr_0 = 3.5406e-04
Loss = 6.8361e-03, PNorm = 147.0198, GNorm = 0.3139, lr_0 = 3.5382e-04
Loss = 8.7195e-03, PNorm = 147.0349, GNorm = 0.1857, lr_0 = 3.5358e-04
Loss = 6.6027e-03, PNorm = 147.0482, GNorm = 0.1122, lr_0 = 3.5333e-04
Loss = 5.7907e-03, PNorm = 147.0633, GNorm = 0.1158, lr_0 = 3.5309e-04
Loss = 5.8306e-03, PNorm = 147.0753, GNorm = 0.2920, lr_0 = 3.5285e-04
Loss = 6.5201e-03, PNorm = 147.0864, GNorm = 0.1693, lr_0 = 3.5261e-04
Loss = 7.0418e-03, PNorm = 147.0996, GNorm = 0.1591, lr_0 = 3.5237e-04
Loss = 6.8176e-03, PNorm = 147.1146, GNorm = 0.2508, lr_0 = 3.5212e-04
Loss = 4.6122e-03, PNorm = 147.1280, GNorm = 0.1806, lr_0 = 3.5188e-04
Loss = 5.7118e-03, PNorm = 147.1407, GNorm = 0.1345, lr_0 = 3.5164e-04
Loss = 6.1469e-03, PNorm = 147.1533, GNorm = 0.2427, lr_0 = 3.5140e-04
Loss = 5.0887e-03, PNorm = 147.1634, GNorm = 0.1792, lr_0 = 3.5116e-04
Loss = 6.8629e-03, PNorm = 147.1775, GNorm = 0.3517, lr_0 = 3.5092e-04
Loss = 6.4172e-03, PNorm = 147.1918, GNorm = 0.3836, lr_0 = 3.5068e-04
Loss = 6.9580e-03, PNorm = 147.2040, GNorm = 0.3452, lr_0 = 3.5044e-04
Loss = 7.6238e-03, PNorm = 147.2170, GNorm = 0.2580, lr_0 = 3.5020e-04
Loss = 8.8383e-03, PNorm = 147.2328, GNorm = 0.1488, lr_0 = 3.4996e-04
Loss = 6.3723e-03, PNorm = 147.2460, GNorm = 0.1709, lr_0 = 3.4972e-04
Loss = 9.7462e-03, PNorm = 147.2593, GNorm = 0.1464, lr_0 = 3.4948e-04
Loss = 6.3953e-03, PNorm = 147.2718, GNorm = 0.3088, lr_0 = 3.4924e-04
Loss = 6.7841e-03, PNorm = 147.2814, GNorm = 0.1504, lr_0 = 3.4900e-04
Loss = 6.3146e-03, PNorm = 147.2960, GNorm = 0.2422, lr_0 = 3.4876e-04
Loss = 5.3746e-03, PNorm = 147.3124, GNorm = 0.2024, lr_0 = 3.4852e-04
Loss = 5.6290e-03, PNorm = 147.3288, GNorm = 0.2032, lr_0 = 3.4828e-04
Loss = 4.5680e-03, PNorm = 147.3416, GNorm = 0.0688, lr_0 = 3.4805e-04
Loss = 6.9092e-03, PNorm = 147.3555, GNorm = 0.3447, lr_0 = 3.4781e-04
Loss = 5.7925e-03, PNorm = 147.3699, GNorm = 0.3018, lr_0 = 3.4757e-04
Loss = 6.5353e-03, PNorm = 147.3877, GNorm = 0.2259, lr_0 = 3.4733e-04
Loss = 4.1432e-03, PNorm = 147.4045, GNorm = 0.0738, lr_0 = 3.4709e-04
Loss = 5.2328e-03, PNorm = 147.4169, GNorm = 0.1381, lr_0 = 3.4686e-04
Loss = 6.7183e-03, PNorm = 147.4313, GNorm = 0.4106, lr_0 = 3.4662e-04
Loss = 7.2900e-03, PNorm = 147.4453, GNorm = 0.2249, lr_0 = 3.4638e-04
Loss = 5.2134e-03, PNorm = 147.4587, GNorm = 0.1742, lr_0 = 3.4614e-04
Loss = 5.4694e-03, PNorm = 147.4742, GNorm = 0.1810, lr_0 = 3.4591e-04
Loss = 6.0863e-03, PNorm = 147.4841, GNorm = 0.1302, lr_0 = 3.4567e-04
Loss = 5.0163e-03, PNorm = 147.4962, GNorm = 0.0979, lr_0 = 3.4543e-04
Loss = 7.1307e-03, PNorm = 147.5075, GNorm = 0.1731, lr_0 = 3.4520e-04
Loss = 6.9138e-03, PNorm = 147.5203, GNorm = 0.1024, lr_0 = 3.4496e-04
Loss = 5.9518e-03, PNorm = 147.5376, GNorm = 0.2028, lr_0 = 3.4472e-04
Loss = 6.0909e-03, PNorm = 147.5543, GNorm = 0.1270, lr_0 = 3.4449e-04
Loss = 6.5677e-03, PNorm = 147.5727, GNorm = 0.4416, lr_0 = 3.4425e-04
Loss = 6.2179e-03, PNorm = 147.5866, GNorm = 0.1731, lr_0 = 3.4402e-04
Loss = 5.4503e-03, PNorm = 147.6001, GNorm = 0.2086, lr_0 = 3.4378e-04
Loss = 6.1154e-03, PNorm = 147.6127, GNorm = 0.3772, lr_0 = 3.4354e-04
Loss = 7.4750e-03, PNorm = 147.6244, GNorm = 0.0672, lr_0 = 3.4331e-04
Validation mae = 0.477554
Epoch 15
Loss = 5.3629e-03, PNorm = 147.6394, GNorm = 0.1724, lr_0 = 3.4307e-04
Loss = 4.8595e-03, PNorm = 147.6496, GNorm = 0.1548, lr_0 = 3.4284e-04
Loss = 4.4346e-03, PNorm = 147.6591, GNorm = 0.1571, lr_0 = 3.4260e-04
Loss = 6.8874e-03, PNorm = 147.6667, GNorm = 0.2640, lr_0 = 3.4237e-04
Loss = 5.5525e-03, PNorm = 147.6738, GNorm = 0.1777, lr_0 = 3.4213e-04
Loss = 4.8985e-03, PNorm = 147.6850, GNorm = 0.1994, lr_0 = 3.4190e-04
Loss = 5.7507e-03, PNorm = 147.6978, GNorm = 0.1274, lr_0 = 3.4167e-04
Loss = 6.5752e-03, PNorm = 147.7123, GNorm = 0.1417, lr_0 = 3.4143e-04
Loss = 6.3670e-03, PNorm = 147.7223, GNorm = 0.2627, lr_0 = 3.4120e-04
Loss = 5.1575e-03, PNorm = 147.7315, GNorm = 0.1436, lr_0 = 3.4096e-04
Loss = 4.7245e-03, PNorm = 147.7432, GNorm = 0.2631, lr_0 = 3.4073e-04
Loss = 4.4986e-03, PNorm = 147.7529, GNorm = 0.1761, lr_0 = 3.4050e-04
Loss = 4.8715e-03, PNorm = 147.7664, GNorm = 0.2274, lr_0 = 3.4026e-04
Loss = 5.3365e-03, PNorm = 147.7776, GNorm = 0.1946, lr_0 = 3.4003e-04
Loss = 4.6643e-03, PNorm = 147.7895, GNorm = 0.1552, lr_0 = 3.3980e-04
Loss = 8.6709e-03, PNorm = 147.7988, GNorm = 0.1788, lr_0 = 3.3956e-04
Loss = 6.9177e-03, PNorm = 147.8044, GNorm = 0.2204, lr_0 = 3.3933e-04
Loss = 5.3403e-03, PNorm = 147.8147, GNorm = 0.1916, lr_0 = 3.3910e-04
Loss = 5.2511e-03, PNorm = 147.8280, GNorm = 0.1276, lr_0 = 3.3887e-04
Loss = 6.4300e-03, PNorm = 147.8408, GNorm = 0.1087, lr_0 = 3.3864e-04
Loss = 3.9149e-03, PNorm = 147.8575, GNorm = 0.1472, lr_0 = 3.3840e-04
Loss = 4.2247e-03, PNorm = 147.8703, GNorm = 0.2217, lr_0 = 3.3817e-04
Loss = 6.7609e-03, PNorm = 147.8790, GNorm = 0.2790, lr_0 = 3.3794e-04
Loss = 4.3032e-03, PNorm = 147.8879, GNorm = 0.0938, lr_0 = 3.3771e-04
Loss = 7.1453e-03, PNorm = 147.9025, GNorm = 0.3345, lr_0 = 3.3748e-04
Loss = 5.2718e-03, PNorm = 147.9127, GNorm = 0.4583, lr_0 = 3.3725e-04
Loss = 4.7109e-03, PNorm = 147.9230, GNorm = 0.3574, lr_0 = 3.3701e-04
Loss = 4.9702e-03, PNorm = 147.9367, GNorm = 0.2666, lr_0 = 3.3678e-04
Loss = 6.3411e-03, PNorm = 147.9490, GNorm = 0.3053, lr_0 = 3.3655e-04
Loss = 4.7663e-03, PNorm = 147.9626, GNorm = 0.2686, lr_0 = 3.3632e-04
Loss = 5.7870e-03, PNorm = 147.9773, GNorm = 0.0834, lr_0 = 3.3609e-04
Loss = 5.0229e-03, PNorm = 147.9906, GNorm = 0.3908, lr_0 = 3.3586e-04
Loss = 5.4612e-03, PNorm = 148.0003, GNorm = 0.4842, lr_0 = 3.3563e-04
Loss = 6.5372e-03, PNorm = 148.0095, GNorm = 0.0991, lr_0 = 3.3540e-04
Loss = 4.5067e-03, PNorm = 148.0197, GNorm = 0.0852, lr_0 = 3.3517e-04
Loss = 4.3942e-03, PNorm = 148.0317, GNorm = 0.2069, lr_0 = 3.3494e-04
Loss = 4.6086e-03, PNorm = 148.0408, GNorm = 0.1714, lr_0 = 3.3471e-04
Loss = 4.4175e-03, PNorm = 148.0484, GNorm = 0.4067, lr_0 = 3.3448e-04
Loss = 4.1547e-03, PNorm = 148.0597, GNorm = 0.4511, lr_0 = 3.3425e-04
Loss = 5.7044e-03, PNorm = 148.0689, GNorm = 0.3129, lr_0 = 3.3403e-04
Loss = 5.6680e-03, PNorm = 148.0793, GNorm = 0.3314, lr_0 = 3.3380e-04
Loss = 5.7760e-03, PNorm = 148.0930, GNorm = 0.2061, lr_0 = 3.3357e-04
Loss = 6.7671e-03, PNorm = 148.1051, GNorm = 0.1592, lr_0 = 3.3334e-04
Loss = 4.5321e-03, PNorm = 148.1146, GNorm = 0.0818, lr_0 = 3.3311e-04
Loss = 5.5332e-03, PNorm = 148.1248, GNorm = 0.1060, lr_0 = 3.3288e-04
Loss = 4.7057e-03, PNorm = 148.1381, GNorm = 0.4785, lr_0 = 3.3265e-04
Loss = 6.1134e-03, PNorm = 148.1465, GNorm = 0.3355, lr_0 = 3.3243e-04
Loss = 5.9478e-03, PNorm = 148.1586, GNorm = 0.1762, lr_0 = 3.3220e-04
Loss = 5.8116e-03, PNorm = 148.1718, GNorm = 0.2210, lr_0 = 3.3197e-04
Loss = 4.2656e-03, PNorm = 148.1833, GNorm = 0.1439, lr_0 = 3.3174e-04
Loss = 5.3097e-03, PNorm = 148.1962, GNorm = 0.2032, lr_0 = 3.3152e-04
Loss = 5.5724e-03, PNorm = 148.2114, GNorm = 0.1911, lr_0 = 3.3129e-04
Loss = 5.4476e-03, PNorm = 148.2269, GNorm = 0.2461, lr_0 = 3.3106e-04
Loss = 7.2176e-03, PNorm = 148.2415, GNorm = 0.1726, lr_0 = 3.3084e-04
Loss = 4.3058e-03, PNorm = 148.2568, GNorm = 0.2263, lr_0 = 3.3061e-04
Loss = 4.6426e-03, PNorm = 148.2684, GNorm = 0.1934, lr_0 = 3.3038e-04
Loss = 4.9525e-03, PNorm = 148.2817, GNorm = 0.2693, lr_0 = 3.3016e-04
Loss = 6.1972e-03, PNorm = 148.2948, GNorm = 0.1531, lr_0 = 3.2993e-04
Loss = 5.3296e-03, PNorm = 148.3070, GNorm = 0.2476, lr_0 = 3.2970e-04
Loss = 4.2367e-03, PNorm = 148.3177, GNorm = 0.1975, lr_0 = 3.2948e-04
Loss = 5.1012e-03, PNorm = 148.3251, GNorm = 0.2609, lr_0 = 3.2925e-04
Loss = 4.7437e-03, PNorm = 148.3325, GNorm = 0.1983, lr_0 = 3.2903e-04
Loss = 4.8578e-03, PNorm = 148.3439, GNorm = 0.3440, lr_0 = 3.2880e-04
Loss = 4.5080e-03, PNorm = 148.3588, GNorm = 0.0724, lr_0 = 3.2858e-04
Loss = 6.6063e-03, PNorm = 148.3672, GNorm = 0.4146, lr_0 = 3.2835e-04
Loss = 5.5069e-03, PNorm = 148.3780, GNorm = 0.1997, lr_0 = 3.2813e-04
Loss = 4.1783e-03, PNorm = 148.3889, GNorm = 0.3002, lr_0 = 3.2790e-04
Loss = 4.5052e-03, PNorm = 148.4008, GNorm = 0.0740, lr_0 = 3.2768e-04
Loss = 7.2666e-03, PNorm = 148.4126, GNorm = 0.1077, lr_0 = 3.2745e-04
Loss = 4.4461e-03, PNorm = 148.4238, GNorm = 0.1099, lr_0 = 3.2723e-04
Loss = 4.7685e-03, PNorm = 148.4332, GNorm = 0.2156, lr_0 = 3.2700e-04
Loss = 4.6741e-03, PNorm = 148.4440, GNorm = 0.3483, lr_0 = 3.2678e-04
Loss = 5.8740e-03, PNorm = 148.4547, GNorm = 0.0907, lr_0 = 3.2656e-04
Loss = 5.1541e-03, PNorm = 148.4657, GNorm = 0.2316, lr_0 = 3.2633e-04
Loss = 4.0163e-03, PNorm = 148.4738, GNorm = 0.2393, lr_0 = 3.2611e-04
Loss = 4.6947e-03, PNorm = 148.4862, GNorm = 0.5509, lr_0 = 3.2589e-04
Loss = 4.7951e-03, PNorm = 148.4992, GNorm = 0.1521, lr_0 = 3.2566e-04
Loss = 4.3200e-03, PNorm = 148.5105, GNorm = 0.2002, lr_0 = 3.2544e-04
Loss = 5.5357e-03, PNorm = 148.5227, GNorm = 0.1344, lr_0 = 3.2522e-04
Loss = 4.7669e-03, PNorm = 148.5377, GNorm = 0.2196, lr_0 = 3.2499e-04
Loss = 5.1386e-03, PNorm = 148.5523, GNorm = 0.1328, lr_0 = 3.2477e-04
Loss = 4.6595e-03, PNorm = 148.5684, GNorm = 0.1640, lr_0 = 3.2455e-04
Loss = 4.2692e-03, PNorm = 148.5782, GNorm = 0.1237, lr_0 = 3.2433e-04
Loss = 4.7302e-03, PNorm = 148.5887, GNorm = 0.1216, lr_0 = 3.2410e-04
Loss = 5.6941e-03, PNorm = 148.6019, GNorm = 0.1260, lr_0 = 3.2388e-04
Loss = 3.9554e-03, PNorm = 148.6154, GNorm = 0.2733, lr_0 = 3.2366e-04
Loss = 6.3220e-03, PNorm = 148.6277, GNorm = 0.2331, lr_0 = 3.2344e-04
Loss = 5.2620e-03, PNorm = 148.6429, GNorm = 0.0741, lr_0 = 3.2322e-04
Loss = 5.1611e-03, PNorm = 148.6543, GNorm = 0.1991, lr_0 = 3.2300e-04
Loss = 6.4361e-03, PNorm = 148.6608, GNorm = 0.2136, lr_0 = 3.2277e-04
Loss = 5.7142e-03, PNorm = 148.6664, GNorm = 0.1931, lr_0 = 3.2255e-04
Loss = 5.4778e-03, PNorm = 148.6736, GNorm = 0.2021, lr_0 = 3.2233e-04
Loss = 7.2625e-03, PNorm = 148.6860, GNorm = 0.3799, lr_0 = 3.2211e-04
Loss = 6.3064e-03, PNorm = 148.7032, GNorm = 0.3515, lr_0 = 3.2189e-04
Loss = 5.4687e-03, PNorm = 148.7163, GNorm = 0.3436, lr_0 = 3.2167e-04
Loss = 1.0541e-02, PNorm = 148.7315, GNorm = 0.1716, lr_0 = 3.2145e-04
Loss = 5.5588e-03, PNorm = 148.7440, GNorm = 0.1801, lr_0 = 3.2123e-04
Loss = 5.3717e-03, PNorm = 148.7571, GNorm = 0.3673, lr_0 = 3.2101e-04
Loss = 6.0670e-03, PNorm = 148.7690, GNorm = 0.1682, lr_0 = 3.2079e-04
Loss = 5.3431e-03, PNorm = 148.7841, GNorm = 0.1202, lr_0 = 3.2057e-04
Loss = 4.5533e-03, PNorm = 148.7958, GNorm = 0.3346, lr_0 = 3.2035e-04
Loss = 5.4758e-03, PNorm = 148.8080, GNorm = 0.4921, lr_0 = 3.2013e-04
Loss = 5.4657e-03, PNorm = 148.8170, GNorm = 0.2061, lr_0 = 3.1991e-04
Loss = 4.4618e-03, PNorm = 148.8260, GNorm = 0.1056, lr_0 = 3.1969e-04
Loss = 4.7633e-03, PNorm = 148.8362, GNorm = 0.1487, lr_0 = 3.1947e-04
Loss = 4.4208e-03, PNorm = 148.8457, GNorm = 0.1200, lr_0 = 3.1925e-04
Loss = 4.6008e-03, PNorm = 148.8590, GNorm = 0.1050, lr_0 = 3.1904e-04
Loss = 4.8600e-03, PNorm = 148.8772, GNorm = 0.0875, lr_0 = 3.1882e-04
Loss = 4.7909e-03, PNorm = 148.8906, GNorm = 0.2939, lr_0 = 3.1860e-04
Loss = 4.5559e-03, PNorm = 148.9026, GNorm = 0.2590, lr_0 = 3.1838e-04
Loss = 4.0261e-03, PNorm = 148.9128, GNorm = 0.1178, lr_0 = 3.1816e-04
Loss = 4.7767e-03, PNorm = 148.9201, GNorm = 0.1448, lr_0 = 3.1794e-04
Loss = 4.5102e-03, PNorm = 148.9329, GNorm = 0.3236, lr_0 = 3.1773e-04
Loss = 6.3241e-03, PNorm = 148.9441, GNorm = 0.2035, lr_0 = 3.1751e-04
Loss = 5.1048e-03, PNorm = 148.9553, GNorm = 0.3523, lr_0 = 3.1729e-04
Loss = 5.2716e-03, PNorm = 148.9694, GNorm = 0.2258, lr_0 = 3.1707e-04
Loss = 4.9842e-03, PNorm = 148.9853, GNorm = 0.2341, lr_0 = 3.1686e-04
Loss = 5.8141e-03, PNorm = 148.9995, GNorm = 0.2683, lr_0 = 3.1664e-04
Loss = 6.8488e-03, PNorm = 149.0164, GNorm = 0.5124, lr_0 = 3.1642e-04
Loss = 5.5437e-03, PNorm = 149.0304, GNorm = 0.1774, lr_0 = 3.1621e-04
Validation mae = 0.477635
Epoch 16
Loss = 4.8208e-03, PNorm = 149.0403, GNorm = 0.5525, lr_0 = 3.1599e-04
Loss = 4.9239e-03, PNorm = 149.0467, GNorm = 0.2072, lr_0 = 3.1577e-04
Loss = 4.3195e-03, PNorm = 149.0564, GNorm = 0.1600, lr_0 = 3.1556e-04
Loss = 3.4938e-03, PNorm = 149.0641, GNorm = 0.1849, lr_0 = 3.1534e-04
Loss = 4.1350e-03, PNorm = 149.0720, GNorm = 0.0871, lr_0 = 3.1512e-04
Loss = 5.5369e-03, PNorm = 149.0781, GNorm = 0.3096, lr_0 = 3.1491e-04
Loss = 3.7940e-03, PNorm = 149.0853, GNorm = 0.4269, lr_0 = 3.1469e-04
Loss = 5.0269e-03, PNorm = 149.0924, GNorm = 0.2000, lr_0 = 3.1448e-04
Loss = 4.7023e-03, PNorm = 149.1022, GNorm = 0.2322, lr_0 = 3.1426e-04
Loss = 8.3245e-03, PNorm = 149.1129, GNorm = 0.1879, lr_0 = 3.1405e-04
Loss = 4.1354e-03, PNorm = 149.1228, GNorm = 0.0850, lr_0 = 3.1383e-04
Loss = 4.0498e-03, PNorm = 149.1284, GNorm = 0.2012, lr_0 = 3.1362e-04
Loss = 5.8315e-03, PNorm = 149.1397, GNorm = 0.1853, lr_0 = 3.1340e-04
Loss = 4.4381e-03, PNorm = 149.1489, GNorm = 0.1342, lr_0 = 3.1319e-04
Loss = 3.9573e-03, PNorm = 149.1568, GNorm = 0.0693, lr_0 = 3.1297e-04
Loss = 3.9762e-03, PNorm = 149.1657, GNorm = 0.0633, lr_0 = 3.1276e-04
Loss = 3.9450e-03, PNorm = 149.1754, GNorm = 0.3088, lr_0 = 3.1254e-04
Loss = 4.1703e-03, PNorm = 149.1823, GNorm = 0.1117, lr_0 = 3.1233e-04
Loss = 4.6495e-03, PNorm = 149.1916, GNorm = 0.2867, lr_0 = 3.1212e-04
Loss = 5.6481e-03, PNorm = 149.1995, GNorm = 0.0727, lr_0 = 3.1190e-04
Loss = 7.1631e-03, PNorm = 149.2060, GNorm = 0.0710, lr_0 = 3.1169e-04
Loss = 4.5118e-03, PNorm = 149.2179, GNorm = 0.1040, lr_0 = 3.1147e-04
Loss = 6.9114e-03, PNorm = 149.2236, GNorm = 0.5558, lr_0 = 3.1126e-04
Loss = 4.1623e-03, PNorm = 149.2327, GNorm = 0.0563, lr_0 = 3.1105e-04
Loss = 3.8534e-03, PNorm = 149.2409, GNorm = 0.1421, lr_0 = 3.1083e-04
Loss = 3.9756e-03, PNorm = 149.2507, GNorm = 0.1023, lr_0 = 3.1062e-04
Loss = 3.5533e-03, PNorm = 149.2609, GNorm = 0.1820, lr_0 = 3.1041e-04
Loss = 5.0784e-03, PNorm = 149.2686, GNorm = 0.1829, lr_0 = 3.1020e-04
Loss = 5.0726e-03, PNorm = 149.2757, GNorm = 0.2864, lr_0 = 3.0998e-04
Loss = 3.4818e-03, PNorm = 149.2849, GNorm = 0.2055, lr_0 = 3.0977e-04
Loss = 4.0125e-03, PNorm = 149.2951, GNorm = 0.2879, lr_0 = 3.0956e-04
Loss = 3.3326e-03, PNorm = 149.3050, GNorm = 0.0675, lr_0 = 3.0935e-04
Loss = 5.9459e-03, PNorm = 149.3185, GNorm = 0.1291, lr_0 = 3.0914e-04
Loss = 4.0017e-03, PNorm = 149.3314, GNorm = 0.1031, lr_0 = 3.0892e-04
Loss = 4.6490e-03, PNorm = 149.3411, GNorm = 0.0727, lr_0 = 3.0871e-04
Loss = 4.6313e-03, PNorm = 149.3502, GNorm = 0.0678, lr_0 = 3.0850e-04
Loss = 6.5132e-03, PNorm = 149.3602, GNorm = 0.4810, lr_0 = 3.0829e-04
Loss = 4.2237e-03, PNorm = 149.3695, GNorm = 0.1991, lr_0 = 3.0808e-04
Loss = 4.1452e-03, PNorm = 149.3778, GNorm = 0.0795, lr_0 = 3.0787e-04
Loss = 3.9523e-03, PNorm = 149.3865, GNorm = 0.0769, lr_0 = 3.0766e-04
Loss = 5.0028e-03, PNorm = 149.3976, GNorm = 0.6343, lr_0 = 3.0745e-04
Loss = 5.5733e-03, PNorm = 149.4080, GNorm = 0.2457, lr_0 = 3.0723e-04
Loss = 3.6158e-03, PNorm = 149.4175, GNorm = 0.2747, lr_0 = 3.0702e-04
Loss = 3.7107e-03, PNorm = 149.4282, GNorm = 0.0968, lr_0 = 3.0681e-04
Loss = 3.6918e-03, PNorm = 149.4374, GNorm = 0.3417, lr_0 = 3.0660e-04
Loss = 3.7507e-03, PNorm = 149.4472, GNorm = 0.0980, lr_0 = 3.0639e-04
Loss = 5.1840e-03, PNorm = 149.4570, GNorm = 0.3193, lr_0 = 3.0618e-04
Loss = 4.0381e-03, PNorm = 149.4639, GNorm = 0.2585, lr_0 = 3.0597e-04
Loss = 6.0057e-03, PNorm = 149.4742, GNorm = 0.2594, lr_0 = 3.0576e-04
Loss = 5.2854e-03, PNorm = 149.4844, GNorm = 0.0593, lr_0 = 3.0555e-04
Loss = 6.1159e-03, PNorm = 149.4933, GNorm = 0.2295, lr_0 = 3.0535e-04
Loss = 3.8545e-03, PNorm = 149.5031, GNorm = 0.1066, lr_0 = 3.0514e-04
Loss = 4.3266e-03, PNorm = 149.5107, GNorm = 0.2187, lr_0 = 3.0493e-04
Loss = 3.7215e-03, PNorm = 149.5187, GNorm = 0.0969, lr_0 = 3.0472e-04
Loss = 4.4121e-03, PNorm = 149.5301, GNorm = 0.1264, lr_0 = 3.0451e-04
Loss = 5.1950e-03, PNorm = 149.5378, GNorm = 0.0943, lr_0 = 3.0430e-04
Loss = 3.8228e-03, PNorm = 149.5475, GNorm = 0.2071, lr_0 = 3.0409e-04
Loss = 3.9803e-03, PNorm = 149.5548, GNorm = 0.5868, lr_0 = 3.0388e-04
Loss = 4.9410e-03, PNorm = 149.5580, GNorm = 0.4803, lr_0 = 3.0368e-04
Loss = 4.8590e-03, PNorm = 149.5654, GNorm = 0.1652, lr_0 = 3.0347e-04
Loss = 4.3281e-03, PNorm = 149.5701, GNorm = 0.0939, lr_0 = 3.0326e-04
Loss = 5.8611e-03, PNorm = 149.5818, GNorm = 0.1645, lr_0 = 3.0305e-04
Loss = 4.2109e-03, PNorm = 149.5898, GNorm = 0.1691, lr_0 = 3.0284e-04
Loss = 4.8405e-03, PNorm = 149.6039, GNorm = 0.2306, lr_0 = 3.0264e-04
Loss = 4.0085e-03, PNorm = 149.6184, GNorm = 0.0880, lr_0 = 3.0243e-04
Loss = 4.2283e-03, PNorm = 149.6296, GNorm = 0.1804, lr_0 = 3.0222e-04
Loss = 3.8570e-03, PNorm = 149.6391, GNorm = 0.1685, lr_0 = 3.0202e-04
Loss = 3.3230e-03, PNorm = 149.6488, GNorm = 0.0966, lr_0 = 3.0181e-04
Loss = 4.7282e-03, PNorm = 149.6581, GNorm = 0.0764, lr_0 = 3.0160e-04
Loss = 3.9064e-03, PNorm = 149.6627, GNorm = 0.1756, lr_0 = 3.0140e-04
Loss = 6.9735e-03, PNorm = 149.6713, GNorm = 0.3465, lr_0 = 3.0119e-04
Loss = 3.6549e-03, PNorm = 149.6790, GNorm = 0.1629, lr_0 = 3.0098e-04
Loss = 4.4995e-03, PNorm = 149.6890, GNorm = 0.1650, lr_0 = 3.0078e-04
Loss = 4.5499e-03, PNorm = 149.7000, GNorm = 0.1480, lr_0 = 3.0057e-04
Loss = 3.2639e-03, PNorm = 149.7117, GNorm = 0.0950, lr_0 = 3.0036e-04
Loss = 5.2222e-03, PNorm = 149.7252, GNorm = 0.1566, lr_0 = 3.0016e-04
Loss = 4.7916e-03, PNorm = 149.7342, GNorm = 0.3951, lr_0 = 2.9995e-04
Loss = 4.6917e-03, PNorm = 149.7449, GNorm = 0.1509, lr_0 = 2.9975e-04
Loss = 5.8336e-03, PNorm = 149.7585, GNorm = 0.3310, lr_0 = 2.9954e-04
Loss = 3.5323e-03, PNorm = 149.7706, GNorm = 0.1808, lr_0 = 2.9934e-04
Loss = 3.4219e-03, PNorm = 149.7822, GNorm = 0.0858, lr_0 = 2.9913e-04
Loss = 3.9948e-03, PNorm = 149.7917, GNorm = 0.1466, lr_0 = 2.9893e-04
Loss = 3.8188e-03, PNorm = 149.7973, GNorm = 0.1455, lr_0 = 2.9872e-04
Loss = 3.1482e-03, PNorm = 149.8020, GNorm = 0.2224, lr_0 = 2.9852e-04
Loss = 4.7677e-03, PNorm = 149.8077, GNorm = 0.0860, lr_0 = 2.9831e-04
Loss = 4.0981e-03, PNorm = 149.8182, GNorm = 0.1733, lr_0 = 2.9811e-04
Loss = 4.9518e-03, PNorm = 149.8268, GNorm = 0.2998, lr_0 = 2.9790e-04
Loss = 3.8046e-03, PNorm = 149.8354, GNorm = 0.2083, lr_0 = 2.9770e-04
Loss = 5.0725e-03, PNorm = 149.8462, GNorm = 0.1804, lr_0 = 2.9750e-04
Loss = 4.3399e-03, PNorm = 149.8573, GNorm = 0.3827, lr_0 = 2.9729e-04
Loss = 3.8886e-03, PNorm = 149.8703, GNorm = 0.2296, lr_0 = 2.9709e-04
Loss = 4.5300e-03, PNorm = 149.8825, GNorm = 0.1931, lr_0 = 2.9689e-04
Loss = 4.3294e-03, PNorm = 149.8928, GNorm = 0.1033, lr_0 = 2.9668e-04
Loss = 3.3220e-03, PNorm = 149.9033, GNorm = 0.1755, lr_0 = 2.9648e-04
Loss = 5.5795e-03, PNorm = 149.9111, GNorm = 0.2854, lr_0 = 2.9628e-04
Loss = 6.0616e-03, PNorm = 149.9214, GNorm = 0.7045, lr_0 = 2.9607e-04
Loss = 5.7263e-03, PNorm = 149.9311, GNorm = 0.4032, lr_0 = 2.9587e-04
Loss = 6.1798e-03, PNorm = 149.9410, GNorm = 0.2221, lr_0 = 2.9567e-04
Loss = 4.9676e-03, PNorm = 149.9494, GNorm = 0.1422, lr_0 = 2.9546e-04
Loss = 4.5273e-03, PNorm = 149.9589, GNorm = 0.1933, lr_0 = 2.9526e-04
Loss = 3.8434e-03, PNorm = 149.9731, GNorm = 0.1268, lr_0 = 2.9506e-04
Loss = 4.2010e-03, PNorm = 149.9825, GNorm = 0.2285, lr_0 = 2.9486e-04
Loss = 4.2104e-03, PNorm = 149.9883, GNorm = 0.2139, lr_0 = 2.9466e-04
Loss = 4.1868e-03, PNorm = 150.0035, GNorm = 0.2462, lr_0 = 2.9445e-04
Loss = 3.4024e-03, PNorm = 150.0183, GNorm = 0.1418, lr_0 = 2.9425e-04
Loss = 3.7993e-03, PNorm = 150.0322, GNorm = 0.0890, lr_0 = 2.9405e-04
Loss = 5.8870e-03, PNorm = 150.0383, GNorm = 0.1635, lr_0 = 2.9385e-04
Loss = 3.5537e-03, PNorm = 150.0454, GNorm = 0.2179, lr_0 = 2.9365e-04
Loss = 5.6129e-03, PNorm = 150.0552, GNorm = 0.3524, lr_0 = 2.9345e-04
Loss = 3.8948e-03, PNorm = 150.0629, GNorm = 0.0602, lr_0 = 2.9325e-04
Loss = 4.6692e-03, PNorm = 150.0698, GNorm = 0.1237, lr_0 = 2.9305e-04
Loss = 4.5334e-03, PNorm = 150.0805, GNorm = 0.1698, lr_0 = 2.9284e-04
Loss = 6.9179e-03, PNorm = 150.0886, GNorm = 0.7090, lr_0 = 2.9264e-04
Loss = 4.9248e-03, PNorm = 150.0997, GNorm = 0.2580, lr_0 = 2.9244e-04
Loss = 6.0392e-03, PNorm = 150.1108, GNorm = 0.0942, lr_0 = 2.9224e-04
Loss = 6.2112e-03, PNorm = 150.1224, GNorm = 0.1424, lr_0 = 2.9204e-04
Loss = 5.0233e-03, PNorm = 150.1317, GNorm = 0.4462, lr_0 = 2.9184e-04
Loss = 5.5152e-03, PNorm = 150.1420, GNorm = 0.2745, lr_0 = 2.9164e-04
Loss = 4.3119e-03, PNorm = 150.1556, GNorm = 0.2407, lr_0 = 2.9144e-04
Loss = 8.0969e-03, PNorm = 150.1678, GNorm = 0.0853, lr_0 = 2.9124e-04
Validation mae = 0.477783
Epoch 17
Loss = 5.1507e-03, PNorm = 150.1776, GNorm = 0.2759, lr_0 = 2.9104e-04
Loss = 4.1572e-03, PNorm = 150.1827, GNorm = 0.0936, lr_0 = 2.9084e-04
Loss = 4.6021e-03, PNorm = 150.1882, GNorm = 0.3026, lr_0 = 2.9065e-04
Loss = 3.6615e-03, PNorm = 150.1979, GNorm = 0.0593, lr_0 = 2.9045e-04
Loss = 4.0657e-03, PNorm = 150.2072, GNorm = 0.0931, lr_0 = 2.9025e-04
Loss = 5.1029e-03, PNorm = 150.2187, GNorm = 0.2503, lr_0 = 2.9005e-04
Loss = 3.2714e-03, PNorm = 150.2288, GNorm = 0.0635, lr_0 = 2.8985e-04
Loss = 7.5829e-03, PNorm = 150.2324, GNorm = 0.2096, lr_0 = 2.8965e-04
Loss = 6.4960e-03, PNorm = 150.2394, GNorm = 0.3100, lr_0 = 2.8945e-04
Loss = 3.4651e-03, PNorm = 150.2480, GNorm = 0.2267, lr_0 = 2.8925e-04
Loss = 3.9263e-03, PNorm = 150.2553, GNorm = 0.1383, lr_0 = 2.8906e-04
Loss = 4.6003e-03, PNorm = 150.2622, GNorm = 0.1712, lr_0 = 2.8886e-04
Loss = 3.4184e-03, PNorm = 150.2707, GNorm = 0.2984, lr_0 = 2.8866e-04
Loss = 3.8850e-03, PNorm = 150.2772, GNorm = 0.1660, lr_0 = 2.8846e-04
Loss = 3.2292e-03, PNorm = 150.2869, GNorm = 0.1446, lr_0 = 2.8826e-04
Loss = 3.9679e-03, PNorm = 150.2992, GNorm = 0.1667, lr_0 = 2.8807e-04
Loss = 3.1594e-03, PNorm = 150.3077, GNorm = 0.0543, lr_0 = 2.8787e-04
Loss = 3.7584e-03, PNorm = 150.3153, GNorm = 0.1634, lr_0 = 2.8767e-04
Loss = 3.4679e-03, PNorm = 150.3221, GNorm = 0.1981, lr_0 = 2.8748e-04
Loss = 4.0353e-03, PNorm = 150.3284, GNorm = 0.2343, lr_0 = 2.8728e-04
Loss = 3.1912e-03, PNorm = 150.3358, GNorm = 0.1309, lr_0 = 2.8708e-04
Loss = 3.2321e-03, PNorm = 150.3424, GNorm = 0.1737, lr_0 = 2.8689e-04
Loss = 3.6907e-03, PNorm = 150.3527, GNorm = 0.1603, lr_0 = 2.8669e-04
Loss = 4.5524e-03, PNorm = 150.3598, GNorm = 0.1653, lr_0 = 2.8649e-04
Loss = 3.6948e-03, PNorm = 150.3692, GNorm = 0.2091, lr_0 = 2.8630e-04
Loss = 3.6750e-03, PNorm = 150.3762, GNorm = 0.1314, lr_0 = 2.8610e-04
Loss = 3.9388e-03, PNorm = 150.3840, GNorm = 0.2231, lr_0 = 2.8590e-04
Loss = 3.9242e-03, PNorm = 150.3934, GNorm = 0.2656, lr_0 = 2.8571e-04
Loss = 4.0394e-03, PNorm = 150.4029, GNorm = 0.1829, lr_0 = 2.8551e-04
Loss = 2.7675e-03, PNorm = 150.4118, GNorm = 0.0945, lr_0 = 2.8532e-04
Loss = 4.7447e-03, PNorm = 150.4184, GNorm = 0.3456, lr_0 = 2.8512e-04
Loss = 3.2433e-03, PNorm = 150.4229, GNorm = 0.2277, lr_0 = 2.8493e-04
Loss = 3.5824e-03, PNorm = 150.4271, GNorm = 0.2515, lr_0 = 2.8473e-04
Loss = 3.7527e-03, PNorm = 150.4339, GNorm = 0.1025, lr_0 = 2.8454e-04
Loss = 3.5222e-03, PNorm = 150.4406, GNorm = 0.0616, lr_0 = 2.8434e-04
Loss = 4.0991e-03, PNorm = 150.4471, GNorm = 0.0581, lr_0 = 2.8415e-04
Loss = 3.3517e-03, PNorm = 150.4574, GNorm = 0.1610, lr_0 = 2.8395e-04
Loss = 4.8132e-03, PNorm = 150.4640, GNorm = 0.1867, lr_0 = 2.8376e-04
Loss = 4.9293e-03, PNorm = 150.4729, GNorm = 0.3300, lr_0 = 2.8356e-04
Loss = 3.1988e-03, PNorm = 150.4793, GNorm = 0.2881, lr_0 = 2.8337e-04
Loss = 3.8684e-03, PNorm = 150.4866, GNorm = 0.2508, lr_0 = 2.8317e-04
Loss = 4.1730e-03, PNorm = 150.4957, GNorm = 0.2854, lr_0 = 2.8298e-04
Loss = 3.0701e-03, PNorm = 150.5035, GNorm = 0.3071, lr_0 = 2.8279e-04
Loss = 5.0870e-03, PNorm = 150.5087, GNorm = 0.0686, lr_0 = 2.8259e-04
Loss = 5.8069e-03, PNorm = 150.5130, GNorm = 0.0867, lr_0 = 2.8240e-04
Loss = 4.3686e-03, PNorm = 150.5239, GNorm = 0.3259, lr_0 = 2.8221e-04
Loss = 2.9708e-03, PNorm = 150.5312, GNorm = 0.0455, lr_0 = 2.8201e-04
Loss = 3.6925e-03, PNorm = 150.5408, GNorm = 0.2786, lr_0 = 2.8182e-04
Loss = 4.7159e-03, PNorm = 150.5468, GNorm = 0.2037, lr_0 = 2.8163e-04
Loss = 3.6418e-03, PNorm = 150.5549, GNorm = 0.1239, lr_0 = 2.8143e-04
Loss = 3.9368e-03, PNorm = 150.5640, GNorm = 0.1646, lr_0 = 2.8124e-04
Loss = 3.1910e-03, PNorm = 150.5738, GNorm = 0.2397, lr_0 = 2.8105e-04
Loss = 4.4633e-03, PNorm = 150.5848, GNorm = 0.1404, lr_0 = 2.8085e-04
Loss = 6.4075e-03, PNorm = 150.5944, GNorm = 0.1496, lr_0 = 2.8066e-04
Loss = 4.5925e-03, PNorm = 150.6040, GNorm = 0.1833, lr_0 = 2.8047e-04
Loss = 3.5349e-03, PNorm = 150.6106, GNorm = 0.1675, lr_0 = 2.8028e-04
Loss = 4.2761e-03, PNorm = 150.6214, GNorm = 0.1793, lr_0 = 2.8009e-04
Loss = 3.5937e-03, PNorm = 150.6326, GNorm = 0.1119, lr_0 = 2.7989e-04
Loss = 3.5629e-03, PNorm = 150.6394, GNorm = 0.1667, lr_0 = 2.7970e-04
Loss = 3.2142e-03, PNorm = 150.6461, GNorm = 0.0733, lr_0 = 2.7951e-04
Loss = 3.0552e-03, PNorm = 150.6512, GNorm = 0.0910, lr_0 = 2.7932e-04
Loss = 3.4332e-03, PNorm = 150.6566, GNorm = 0.2005, lr_0 = 2.7913e-04
Loss = 3.4080e-03, PNorm = 150.6643, GNorm = 0.1590, lr_0 = 2.7894e-04
Loss = 4.5142e-03, PNorm = 150.6696, GNorm = 0.0976, lr_0 = 2.7875e-04
Loss = 3.6685e-03, PNorm = 150.6755, GNorm = 0.0935, lr_0 = 2.7855e-04
Loss = 4.4371e-03, PNorm = 150.6840, GNorm = 0.2372, lr_0 = 2.7836e-04
Loss = 3.7820e-03, PNorm = 150.6959, GNorm = 0.2456, lr_0 = 2.7817e-04
Loss = 3.2848e-03, PNorm = 150.7035, GNorm = 0.2327, lr_0 = 2.7798e-04
Loss = 4.3041e-03, PNorm = 150.7126, GNorm = 0.1964, lr_0 = 2.7779e-04
Loss = 4.6386e-03, PNorm = 150.7234, GNorm = 0.1176, lr_0 = 2.7760e-04
Loss = 3.5252e-03, PNorm = 150.7299, GNorm = 0.3471, lr_0 = 2.7741e-04
Loss = 4.8064e-03, PNorm = 150.7393, GNorm = 0.0928, lr_0 = 2.7722e-04
Loss = 3.0782e-03, PNorm = 150.7469, GNorm = 0.1061, lr_0 = 2.7703e-04
Loss = 3.9285e-03, PNorm = 150.7573, GNorm = 0.1871, lr_0 = 2.7684e-04
Loss = 3.5892e-03, PNorm = 150.7662, GNorm = 0.0766, lr_0 = 2.7665e-04
Loss = 5.8355e-03, PNorm = 150.7751, GNorm = 0.2148, lr_0 = 2.7646e-04
Loss = 3.6138e-03, PNorm = 150.7850, GNorm = 0.0789, lr_0 = 2.7627e-04
Loss = 4.3614e-03, PNorm = 150.7913, GNorm = 0.1318, lr_0 = 2.7608e-04
Loss = 4.1789e-03, PNorm = 150.7999, GNorm = 0.2575, lr_0 = 2.7590e-04
Loss = 3.7177e-03, PNorm = 150.8079, GNorm = 0.1425, lr_0 = 2.7571e-04
Loss = 3.2859e-03, PNorm = 150.8169, GNorm = 0.1593, lr_0 = 2.7552e-04
Loss = 5.2725e-03, PNorm = 150.8223, GNorm = 0.0665, lr_0 = 2.7533e-04
Loss = 4.0503e-03, PNorm = 150.8280, GNorm = 0.2448, lr_0 = 2.7514e-04
Loss = 3.6062e-03, PNorm = 150.8365, GNorm = 0.2127, lr_0 = 2.7495e-04
Loss = 4.0555e-03, PNorm = 150.8481, GNorm = 0.1239, lr_0 = 2.7476e-04
Loss = 3.7406e-03, PNorm = 150.8605, GNorm = 0.3338, lr_0 = 2.7457e-04
Loss = 4.6075e-03, PNorm = 150.8727, GNorm = 0.1698, lr_0 = 2.7439e-04
Loss = 4.1847e-03, PNorm = 150.8823, GNorm = 0.1798, lr_0 = 2.7420e-04
Loss = 5.2394e-03, PNorm = 150.8894, GNorm = 0.1384, lr_0 = 2.7401e-04
Loss = 4.2378e-03, PNorm = 150.8960, GNorm = 0.1669, lr_0 = 2.7382e-04
Loss = 4.0744e-03, PNorm = 150.9052, GNorm = 0.1380, lr_0 = 2.7364e-04
Loss = 5.6238e-03, PNorm = 150.9135, GNorm = 0.0783, lr_0 = 2.7345e-04
Loss = 3.2926e-03, PNorm = 150.9217, GNorm = 0.1728, lr_0 = 2.7326e-04
Loss = 4.9238e-03, PNorm = 150.9284, GNorm = 0.1490, lr_0 = 2.7307e-04
Loss = 4.6309e-03, PNorm = 150.9347, GNorm = 0.2005, lr_0 = 2.7289e-04
Loss = 3.6883e-03, PNorm = 150.9436, GNorm = 0.1488, lr_0 = 2.7270e-04
Loss = 3.1535e-03, PNorm = 150.9539, GNorm = 0.1804, lr_0 = 2.7251e-04
Loss = 3.9818e-03, PNorm = 150.9619, GNorm = 0.3731, lr_0 = 2.7233e-04
Loss = 3.6778e-03, PNorm = 150.9713, GNorm = 0.1132, lr_0 = 2.7214e-04
Loss = 3.0360e-03, PNorm = 150.9783, GNorm = 0.1260, lr_0 = 2.7195e-04
Loss = 3.8704e-03, PNorm = 150.9890, GNorm = 0.2285, lr_0 = 2.7177e-04
Loss = 3.3027e-03, PNorm = 150.9978, GNorm = 0.1855, lr_0 = 2.7158e-04
Loss = 4.7636e-03, PNorm = 151.0057, GNorm = 0.3434, lr_0 = 2.7139e-04
Loss = 4.1997e-03, PNorm = 151.0141, GNorm = 0.1351, lr_0 = 2.7121e-04
Loss = 3.5866e-03, PNorm = 151.0211, GNorm = 0.1121, lr_0 = 2.7102e-04
Loss = 3.9884e-03, PNorm = 151.0290, GNorm = 0.2660, lr_0 = 2.7084e-04
Loss = 5.1618e-03, PNorm = 151.0354, GNorm = 0.1278, lr_0 = 2.7065e-04
Loss = 6.5089e-03, PNorm = 151.0401, GNorm = 0.1026, lr_0 = 2.7047e-04
Loss = 6.3944e-03, PNorm = 151.0490, GNorm = 0.2423, lr_0 = 2.7028e-04
Loss = 3.7672e-03, PNorm = 151.0538, GNorm = 0.1595, lr_0 = 2.7010e-04
Loss = 3.7763e-03, PNorm = 151.0635, GNorm = 0.1379, lr_0 = 2.6991e-04
Loss = 3.6246e-03, PNorm = 151.0723, GNorm = 0.1672, lr_0 = 2.6973e-04
Loss = 3.2739e-03, PNorm = 151.0782, GNorm = 0.0946, lr_0 = 2.6954e-04
Loss = 3.1062e-03, PNorm = 151.0844, GNorm = 0.0895, lr_0 = 2.6936e-04
Loss = 4.0717e-03, PNorm = 151.0916, GNorm = 0.1884, lr_0 = 2.6917e-04
Loss = 5.1440e-03, PNorm = 151.0981, GNorm = 0.1427, lr_0 = 2.6899e-04
Loss = 6.3501e-03, PNorm = 151.1081, GNorm = 0.3558, lr_0 = 2.6880e-04
Loss = 5.5841e-03, PNorm = 151.1187, GNorm = 0.0967, lr_0 = 2.6862e-04
Loss = 3.4255e-03, PNorm = 151.1293, GNorm = 0.2643, lr_0 = 2.6844e-04
Loss = 3.4015e-03, PNorm = 151.1392, GNorm = 0.1621, lr_0 = 2.6825e-04
Validation mae = 0.476413
Epoch 18
Loss = 6.5642e-03, PNorm = 151.1439, GNorm = 0.0969, lr_0 = 2.6807e-04
Loss = 3.8680e-03, PNorm = 151.1497, GNorm = 0.0805, lr_0 = 2.6788e-04
Loss = 3.2834e-03, PNorm = 151.1585, GNorm = 0.0924, lr_0 = 2.6770e-04
Loss = 3.1887e-03, PNorm = 151.1656, GNorm = 0.1095, lr_0 = 2.6752e-04
Loss = 4.2545e-03, PNorm = 151.1707, GNorm = 0.1381, lr_0 = 2.6733e-04
Loss = 4.0265e-03, PNorm = 151.1773, GNorm = 0.1574, lr_0 = 2.6715e-04
Loss = 2.9954e-03, PNorm = 151.1846, GNorm = 0.1653, lr_0 = 2.6697e-04
Loss = 3.7332e-03, PNorm = 151.1898, GNorm = 0.2829, lr_0 = 2.6678e-04
Loss = 3.5359e-03, PNorm = 151.2001, GNorm = 0.0829, lr_0 = 2.6660e-04
Loss = 5.2098e-03, PNorm = 151.2064, GNorm = 0.1065, lr_0 = 2.6642e-04
Loss = 3.1441e-03, PNorm = 151.2106, GNorm = 0.0573, lr_0 = 2.6624e-04
Loss = 4.5279e-03, PNorm = 151.2135, GNorm = 0.2740, lr_0 = 2.6605e-04
Loss = 3.3023e-03, PNorm = 151.2206, GNorm = 0.1952, lr_0 = 2.6587e-04
Loss = 3.7709e-03, PNorm = 151.2288, GNorm = 0.1122, lr_0 = 2.6569e-04
Loss = 2.5975e-03, PNorm = 151.2367, GNorm = 0.1238, lr_0 = 2.6551e-04
Loss = 2.6244e-03, PNorm = 151.2438, GNorm = 0.1164, lr_0 = 2.6533e-04
Loss = 3.4716e-03, PNorm = 151.2476, GNorm = 0.2424, lr_0 = 2.6514e-04
Loss = 3.4149e-03, PNorm = 151.2501, GNorm = 0.1502, lr_0 = 2.6496e-04
Loss = 2.6343e-03, PNorm = 151.2573, GNorm = 0.0648, lr_0 = 2.6478e-04
Loss = 2.7671e-03, PNorm = 151.2644, GNorm = 0.0864, lr_0 = 2.6460e-04
Loss = 3.6929e-03, PNorm = 151.2713, GNorm = 0.1327, lr_0 = 2.6442e-04
Loss = 4.5772e-03, PNorm = 151.2749, GNorm = 0.3782, lr_0 = 2.6424e-04
Loss = 2.8201e-03, PNorm = 151.2809, GNorm = 0.0482, lr_0 = 2.6406e-04
Loss = 2.7336e-03, PNorm = 151.2854, GNorm = 0.0854, lr_0 = 2.6388e-04
Loss = 3.4896e-03, PNorm = 151.2925, GNorm = 0.1627, lr_0 = 2.6369e-04
Loss = 3.5917e-03, PNorm = 151.3016, GNorm = 0.2433, lr_0 = 2.6351e-04
Loss = 3.2770e-03, PNorm = 151.3064, GNorm = 0.4000, lr_0 = 2.6333e-04
Loss = 3.0305e-03, PNorm = 151.3147, GNorm = 0.1675, lr_0 = 2.6315e-04
Loss = 3.7370e-03, PNorm = 151.3225, GNorm = 0.2023, lr_0 = 2.6297e-04
Loss = 3.1686e-03, PNorm = 151.3269, GNorm = 0.1828, lr_0 = 2.6279e-04
Loss = 3.3617e-03, PNorm = 151.3332, GNorm = 0.0961, lr_0 = 2.6261e-04
Loss = 4.0487e-03, PNorm = 151.3379, GNorm = 0.0675, lr_0 = 2.6243e-04
Loss = 4.7378e-03, PNorm = 151.3442, GNorm = 0.1449, lr_0 = 2.6225e-04
Loss = 2.6825e-03, PNorm = 151.3499, GNorm = 0.1759, lr_0 = 2.6207e-04
Loss = 2.7813e-03, PNorm = 151.3586, GNorm = 0.1620, lr_0 = 2.6189e-04
Loss = 3.6818e-03, PNorm = 151.3666, GNorm = 0.1823, lr_0 = 2.6171e-04
Loss = 2.8678e-03, PNorm = 151.3711, GNorm = 0.3230, lr_0 = 2.6153e-04
Loss = 3.9191e-03, PNorm = 151.3772, GNorm = 0.1610, lr_0 = 2.6136e-04
Loss = 3.1789e-03, PNorm = 151.3855, GNorm = 0.2918, lr_0 = 2.6118e-04
Loss = 2.8393e-03, PNorm = 151.3926, GNorm = 0.1255, lr_0 = 2.6100e-04
Loss = 3.4785e-03, PNorm = 151.4004, GNorm = 0.0966, lr_0 = 2.6082e-04
Loss = 2.4742e-03, PNorm = 151.4062, GNorm = 0.2661, lr_0 = 2.6064e-04
Loss = 2.5104e-03, PNorm = 151.4120, GNorm = 0.2550, lr_0 = 2.6046e-04
Loss = 3.1877e-03, PNorm = 151.4180, GNorm = 0.2024, lr_0 = 2.6028e-04
Loss = 3.5358e-03, PNorm = 151.4256, GNorm = 0.1777, lr_0 = 2.6011e-04
Loss = 2.4580e-03, PNorm = 151.4335, GNorm = 0.0838, lr_0 = 2.5993e-04
Loss = 2.8113e-03, PNorm = 151.4404, GNorm = 0.1043, lr_0 = 2.5975e-04
Loss = 3.4142e-03, PNorm = 151.4502, GNorm = 0.1468, lr_0 = 2.5957e-04
Loss = 3.6266e-03, PNorm = 151.4544, GNorm = 0.2487, lr_0 = 2.5939e-04
Loss = 3.2586e-03, PNorm = 151.4605, GNorm = 0.2477, lr_0 = 2.5922e-04
Loss = 2.7523e-03, PNorm = 151.4630, GNorm = 0.2673, lr_0 = 2.5904e-04
Loss = 2.9751e-03, PNorm = 151.4668, GNorm = 0.2119, lr_0 = 2.5886e-04
Loss = 3.5131e-03, PNorm = 151.4709, GNorm = 0.3167, lr_0 = 2.5868e-04
Loss = 4.0578e-03, PNorm = 151.4740, GNorm = 0.1013, lr_0 = 2.5851e-04
Loss = 4.5723e-03, PNorm = 151.4809, GNorm = 0.1086, lr_0 = 2.5833e-04
Loss = 3.7167e-03, PNorm = 151.4879, GNorm = 0.1912, lr_0 = 2.5815e-04
Loss = 3.4109e-03, PNorm = 151.4923, GNorm = 0.2897, lr_0 = 2.5797e-04
Loss = 3.8243e-03, PNorm = 151.5002, GNorm = 0.2230, lr_0 = 2.5780e-04
Loss = 2.5529e-03, PNorm = 151.5057, GNorm = 0.0637, lr_0 = 2.5762e-04
Loss = 2.7747e-03, PNorm = 151.5125, GNorm = 0.1455, lr_0 = 2.5745e-04
Loss = 4.3813e-03, PNorm = 151.5192, GNorm = 0.0914, lr_0 = 2.5727e-04
Loss = 2.7338e-03, PNorm = 151.5246, GNorm = 0.0749, lr_0 = 2.5709e-04
Loss = 3.0959e-03, PNorm = 151.5289, GNorm = 0.1559, lr_0 = 2.5692e-04
Loss = 2.5471e-03, PNorm = 151.5347, GNorm = 0.2101, lr_0 = 2.5674e-04
Loss = 2.5328e-03, PNorm = 151.5413, GNorm = 0.1124, lr_0 = 2.5656e-04
Loss = 3.1301e-03, PNorm = 151.5504, GNorm = 0.0931, lr_0 = 2.5639e-04
Loss = 2.6846e-03, PNorm = 151.5575, GNorm = 0.1367, lr_0 = 2.5621e-04
Loss = 2.8610e-03, PNorm = 151.5651, GNorm = 0.1196, lr_0 = 2.5604e-04
Loss = 4.3787e-03, PNorm = 151.5732, GNorm = 0.2926, lr_0 = 2.5586e-04
Loss = 2.3215e-03, PNorm = 151.5796, GNorm = 0.0675, lr_0 = 2.5569e-04
Loss = 2.2490e-03, PNorm = 151.5862, GNorm = 0.0824, lr_0 = 2.5551e-04
Loss = 2.3370e-03, PNorm = 151.5908, GNorm = 0.0876, lr_0 = 2.5534e-04
Loss = 3.5126e-03, PNorm = 151.5934, GNorm = 0.0842, lr_0 = 2.5516e-04
Loss = 4.8363e-03, PNorm = 151.5991, GNorm = 0.0905, lr_0 = 2.5499e-04
Loss = 2.9306e-03, PNorm = 151.6106, GNorm = 0.0809, lr_0 = 2.5481e-04
Loss = 6.1795e-03, PNorm = 151.6195, GNorm = 0.1888, lr_0 = 2.5464e-04
Loss = 2.7500e-03, PNorm = 151.6279, GNorm = 0.2509, lr_0 = 2.5446e-04
Loss = 3.1338e-03, PNorm = 151.6324, GNorm = 0.0993, lr_0 = 2.5429e-04
Loss = 2.3223e-03, PNorm = 151.6385, GNorm = 0.1833, lr_0 = 2.5411e-04
Loss = 2.4625e-03, PNorm = 151.6448, GNorm = 0.1854, lr_0 = 2.5394e-04
Loss = 2.5065e-03, PNorm = 151.6523, GNorm = 0.1772, lr_0 = 2.5377e-04
Loss = 2.7761e-03, PNorm = 151.6586, GNorm = 0.1351, lr_0 = 2.5359e-04
Loss = 2.5326e-03, PNorm = 151.6619, GNorm = 0.1434, lr_0 = 2.5342e-04
Loss = 2.5736e-03, PNorm = 151.6689, GNorm = 0.2691, lr_0 = 2.5325e-04
Loss = 3.1597e-03, PNorm = 151.6753, GNorm = 0.2030, lr_0 = 2.5307e-04
Loss = 3.5970e-03, PNorm = 151.6815, GNorm = 0.0774, lr_0 = 2.5290e-04
Loss = 3.5737e-03, PNorm = 151.6858, GNorm = 0.1968, lr_0 = 2.5273e-04
Loss = 2.9219e-03, PNorm = 151.6918, GNorm = 0.4102, lr_0 = 2.5255e-04
Loss = 3.5007e-03, PNorm = 151.6984, GNorm = 0.0430, lr_0 = 2.5238e-04
Loss = 3.0259e-03, PNorm = 151.7045, GNorm = 0.1791, lr_0 = 2.5221e-04
Loss = 4.3048e-03, PNorm = 151.7082, GNorm = 0.0950, lr_0 = 2.5203e-04
Loss = 2.9556e-03, PNorm = 151.7160, GNorm = 0.1125, lr_0 = 2.5186e-04
Loss = 3.6685e-03, PNorm = 151.7245, GNorm = 0.2196, lr_0 = 2.5169e-04
Loss = 3.1407e-03, PNorm = 151.7329, GNorm = 0.0904, lr_0 = 2.5152e-04
Loss = 2.9060e-03, PNorm = 151.7404, GNorm = 0.1955, lr_0 = 2.5134e-04
Loss = 3.6817e-03, PNorm = 151.7476, GNorm = 0.0803, lr_0 = 2.5117e-04
Loss = 5.6876e-03, PNorm = 151.7526, GNorm = 0.2599, lr_0 = 2.5100e-04
Loss = 3.4943e-03, PNorm = 151.7580, GNorm = 0.1847, lr_0 = 2.5083e-04
Loss = 3.1480e-03, PNorm = 151.7661, GNorm = 0.1040, lr_0 = 2.5066e-04
Loss = 3.4881e-03, PNorm = 151.7741, GNorm = 0.1775, lr_0 = 2.5048e-04
Loss = 3.3674e-03, PNorm = 151.7815, GNorm = 0.0759, lr_0 = 2.5031e-04
Loss = 5.2509e-03, PNorm = 151.7892, GNorm = 0.2301, lr_0 = 2.5014e-04
Loss = 4.0446e-03, PNorm = 151.7974, GNorm = 0.1455, lr_0 = 2.4997e-04
Loss = 2.3193e-03, PNorm = 151.8050, GNorm = 0.0840, lr_0 = 2.4980e-04
Loss = 3.0222e-03, PNorm = 151.8121, GNorm = 0.1210, lr_0 = 2.4963e-04
Loss = 4.4868e-03, PNorm = 151.8198, GNorm = 0.0551, lr_0 = 2.4946e-04
Loss = 2.8117e-03, PNorm = 151.8290, GNorm = 0.1769, lr_0 = 2.4929e-04
Loss = 3.6999e-03, PNorm = 151.8384, GNorm = 0.1404, lr_0 = 2.4911e-04
Loss = 5.0023e-03, PNorm = 151.8438, GNorm = 0.1571, lr_0 = 2.4894e-04
Loss = 3.5927e-03, PNorm = 151.8516, GNorm = 0.4059, lr_0 = 2.4877e-04
Loss = 3.2130e-03, PNorm = 151.8605, GNorm = 0.0704, lr_0 = 2.4860e-04
Loss = 3.0091e-03, PNorm = 151.8679, GNorm = 0.2806, lr_0 = 2.4843e-04
Loss = 4.2071e-03, PNorm = 151.8767, GNorm = 0.1997, lr_0 = 2.4826e-04
Loss = 6.0402e-03, PNorm = 151.8855, GNorm = 0.1494, lr_0 = 2.4809e-04
Loss = 3.4204e-03, PNorm = 151.8957, GNorm = 0.1835, lr_0 = 2.4792e-04
Loss = 2.8731e-03, PNorm = 151.9019, GNorm = 0.1340, lr_0 = 2.4775e-04
Loss = 4.8509e-03, PNorm = 151.9076, GNorm = 0.1435, lr_0 = 2.4758e-04
Loss = 3.3118e-03, PNorm = 151.9162, GNorm = 0.2073, lr_0 = 2.4741e-04
Loss = 3.1535e-03, PNorm = 151.9245, GNorm = 0.0565, lr_0 = 2.4724e-04
Loss = 3.0813e-03, PNorm = 151.9328, GNorm = 0.1414, lr_0 = 2.4707e-04
Validation mae = 0.477007
Epoch 19
Loss = 2.7429e-03, PNorm = 151.9379, GNorm = 0.1583, lr_0 = 2.4690e-04
Loss = 2.3688e-03, PNorm = 151.9383, GNorm = 0.1540, lr_0 = 2.4674e-04
Loss = 2.4278e-03, PNorm = 151.9425, GNorm = 0.1216, lr_0 = 2.4657e-04
Loss = 5.7898e-03, PNorm = 151.9437, GNorm = 0.1574, lr_0 = 2.4640e-04
Loss = 2.2387e-03, PNorm = 151.9499, GNorm = 0.1434, lr_0 = 2.4623e-04
Loss = 2.8040e-03, PNorm = 151.9565, GNorm = 0.0702, lr_0 = 2.4606e-04
Loss = 3.5035e-03, PNorm = 151.9614, GNorm = 0.1245, lr_0 = 2.4589e-04
Loss = 2.5832e-03, PNorm = 151.9681, GNorm = 0.1656, lr_0 = 2.4572e-04
Loss = 2.1589e-03, PNorm = 151.9749, GNorm = 0.1555, lr_0 = 2.4556e-04
Loss = 2.5909e-03, PNorm = 151.9809, GNorm = 0.1067, lr_0 = 2.4539e-04
Loss = 3.4972e-03, PNorm = 151.9866, GNorm = 0.1841, lr_0 = 2.4522e-04
Loss = 3.0300e-03, PNorm = 151.9924, GNorm = 0.1216, lr_0 = 2.4505e-04
Loss = 4.9605e-03, PNorm = 151.9998, GNorm = 0.0574, lr_0 = 2.4488e-04
Loss = 3.8583e-03, PNorm = 152.0072, GNorm = 0.0566, lr_0 = 2.4472e-04
Loss = 4.0980e-03, PNorm = 152.0123, GNorm = 0.1532, lr_0 = 2.4455e-04
Loss = 2.5575e-03, PNorm = 152.0177, GNorm = 0.2413, lr_0 = 2.4438e-04
Loss = 3.0915e-03, PNorm = 152.0234, GNorm = 0.0813, lr_0 = 2.4421e-04
Loss = 3.0410e-03, PNorm = 152.0269, GNorm = 0.1633, lr_0 = 2.4405e-04
Loss = 2.3276e-03, PNorm = 152.0307, GNorm = 0.1916, lr_0 = 2.4388e-04
Loss = 3.2367e-03, PNorm = 152.0369, GNorm = 0.0632, lr_0 = 2.4371e-04
Loss = 4.4219e-03, PNorm = 152.0405, GNorm = 0.2280, lr_0 = 2.4354e-04
Loss = 2.5294e-03, PNorm = 152.0448, GNorm = 0.1377, lr_0 = 2.4338e-04
Loss = 3.9104e-03, PNorm = 152.0531, GNorm = 0.1196, lr_0 = 2.4321e-04
Loss = 2.5655e-03, PNorm = 152.0588, GNorm = 0.2272, lr_0 = 2.4304e-04
Loss = 2.5839e-03, PNorm = 152.0657, GNorm = 0.0620, lr_0 = 2.4288e-04
Loss = 3.9730e-03, PNorm = 152.0721, GNorm = 0.1692, lr_0 = 2.4271e-04
Loss = 2.3938e-03, PNorm = 152.0771, GNorm = 0.1448, lr_0 = 2.4254e-04
Loss = 3.0656e-03, PNorm = 152.0834, GNorm = 0.1858, lr_0 = 2.4238e-04
Loss = 3.1276e-03, PNorm = 152.0865, GNorm = 0.1191, lr_0 = 2.4221e-04
Loss = 2.4551e-03, PNorm = 152.0917, GNorm = 0.1352, lr_0 = 2.4205e-04
Loss = 2.3447e-03, PNorm = 152.0977, GNorm = 0.1271, lr_0 = 2.4188e-04
Loss = 2.8422e-03, PNorm = 152.1048, GNorm = 0.0804, lr_0 = 2.4171e-04
Loss = 2.7881e-03, PNorm = 152.1097, GNorm = 0.1007, lr_0 = 2.4155e-04
Loss = 2.2762e-03, PNorm = 152.1143, GNorm = 0.0989, lr_0 = 2.4138e-04
Loss = 3.7680e-03, PNorm = 152.1167, GNorm = 0.1291, lr_0 = 2.4122e-04
Loss = 2.6516e-03, PNorm = 152.1236, GNorm = 0.1465, lr_0 = 2.4105e-04
Loss = 2.5625e-03, PNorm = 152.1283, GNorm = 0.1182, lr_0 = 2.4089e-04
Loss = 2.8036e-03, PNorm = 152.1347, GNorm = 0.0667, lr_0 = 2.4072e-04
Loss = 2.4659e-03, PNorm = 152.1370, GNorm = 0.1567, lr_0 = 2.4056e-04
Loss = 3.4193e-03, PNorm = 152.1419, GNorm = 0.3310, lr_0 = 2.4039e-04
Loss = 3.9020e-03, PNorm = 152.1472, GNorm = 0.0659, lr_0 = 2.4023e-04
Loss = 2.7285e-03, PNorm = 152.1538, GNorm = 0.2900, lr_0 = 2.4006e-04
Loss = 2.2993e-03, PNorm = 152.1592, GNorm = 0.0549, lr_0 = 2.3990e-04
Loss = 2.9939e-03, PNorm = 152.1650, GNorm = 0.2421, lr_0 = 2.3974e-04
Loss = 5.2336e-03, PNorm = 152.1719, GNorm = 0.4375, lr_0 = 2.3957e-04
Loss = 2.5897e-03, PNorm = 152.1809, GNorm = 0.2014, lr_0 = 2.3941e-04
Loss = 2.1444e-03, PNorm = 152.1840, GNorm = 0.1798, lr_0 = 2.3924e-04
Loss = 2.3755e-03, PNorm = 152.1871, GNorm = 0.0902, lr_0 = 2.3908e-04
Loss = 2.1944e-03, PNorm = 152.1915, GNorm = 0.1974, lr_0 = 2.3892e-04
Loss = 3.2409e-03, PNorm = 152.1981, GNorm = 0.2611, lr_0 = 2.3875e-04
Loss = 4.6650e-03, PNorm = 152.2032, GNorm = 0.1485, lr_0 = 2.3859e-04
Loss = 2.3887e-03, PNorm = 152.2104, GNorm = 0.0958, lr_0 = 2.3842e-04
Loss = 2.6852e-03, PNorm = 152.2139, GNorm = 0.2436, lr_0 = 2.3826e-04
Loss = 2.8193e-03, PNorm = 152.2184, GNorm = 0.1426, lr_0 = 2.3810e-04
Loss = 2.2283e-03, PNorm = 152.2230, GNorm = 0.1653, lr_0 = 2.3794e-04
Loss = 2.2453e-03, PNorm = 152.2281, GNorm = 0.1787, lr_0 = 2.3777e-04
Loss = 2.9028e-03, PNorm = 152.2334, GNorm = 0.1846, lr_0 = 2.3761e-04
Loss = 2.9365e-03, PNorm = 152.2389, GNorm = 0.1090, lr_0 = 2.3745e-04
Loss = 4.2724e-03, PNorm = 152.2422, GNorm = 0.0804, lr_0 = 2.3728e-04
Loss = 4.2636e-03, PNorm = 152.2444, GNorm = 0.2885, lr_0 = 2.3712e-04
Loss = 2.8616e-03, PNorm = 152.2502, GNorm = 0.2873, lr_0 = 2.3696e-04
Loss = 3.1999e-03, PNorm = 152.2578, GNorm = 0.2815, lr_0 = 2.3680e-04
Loss = 3.4209e-03, PNorm = 152.2658, GNorm = 0.1009, lr_0 = 2.3663e-04
Loss = 2.3057e-03, PNorm = 152.2707, GNorm = 0.3195, lr_0 = 2.3647e-04
Loss = 2.3901e-03, PNorm = 152.2795, GNorm = 0.2182, lr_0 = 2.3631e-04
Loss = 2.7221e-03, PNorm = 152.2861, GNorm = 0.0859, lr_0 = 2.3615e-04
Loss = 3.2177e-03, PNorm = 152.2927, GNorm = 0.1970, lr_0 = 2.3599e-04
Loss = 4.3569e-03, PNorm = 152.2986, GNorm = 0.0585, lr_0 = 2.3582e-04
Loss = 2.9861e-03, PNorm = 152.3039, GNorm = 0.1848, lr_0 = 2.3566e-04
Loss = 2.6936e-03, PNorm = 152.3098, GNorm = 0.0723, lr_0 = 2.3550e-04
Loss = 2.0178e-03, PNorm = 152.3157, GNorm = 0.0925, lr_0 = 2.3534e-04
Loss = 2.2242e-03, PNorm = 152.3198, GNorm = 0.2037, lr_0 = 2.3518e-04
Loss = 1.9655e-03, PNorm = 152.3239, GNorm = 0.0649, lr_0 = 2.3502e-04
Loss = 2.9396e-03, PNorm = 152.3280, GNorm = 0.2407, lr_0 = 2.3486e-04
Loss = 2.9697e-03, PNorm = 152.3333, GNorm = 0.1509, lr_0 = 2.3470e-04
Loss = 3.4973e-03, PNorm = 152.3385, GNorm = 0.1529, lr_0 = 2.3454e-04
Loss = 3.5138e-03, PNorm = 152.3431, GNorm = 0.1209, lr_0 = 2.3437e-04
Loss = 2.9440e-03, PNorm = 152.3480, GNorm = 0.0891, lr_0 = 2.3421e-04
Loss = 3.7346e-03, PNorm = 152.3507, GNorm = 0.2692, lr_0 = 2.3405e-04
Loss = 3.4872e-03, PNorm = 152.3596, GNorm = 0.1423, lr_0 = 2.3389e-04
Loss = 2.4971e-03, PNorm = 152.3645, GNorm = 0.2126, lr_0 = 2.3373e-04
Loss = 2.7888e-03, PNorm = 152.3705, GNorm = 0.1519, lr_0 = 2.3357e-04
Loss = 3.1239e-03, PNorm = 152.3767, GNorm = 0.0591, lr_0 = 2.3341e-04
Loss = 2.1705e-03, PNorm = 152.3837, GNorm = 0.0927, lr_0 = 2.3325e-04
Loss = 2.3616e-03, PNorm = 152.3890, GNorm = 0.1118, lr_0 = 2.3309e-04
Loss = 2.4229e-03, PNorm = 152.3963, GNorm = 0.1306, lr_0 = 2.3293e-04
Loss = 2.6559e-03, PNorm = 152.4048, GNorm = 0.0485, lr_0 = 2.3277e-04
Loss = 2.1117e-03, PNorm = 152.4146, GNorm = 0.0766, lr_0 = 2.3261e-04
Loss = 3.4244e-03, PNorm = 152.4206, GNorm = 0.1634, lr_0 = 2.3246e-04
Loss = 2.7373e-03, PNorm = 152.4258, GNorm = 0.1339, lr_0 = 2.3230e-04
Loss = 2.3736e-03, PNorm = 152.4323, GNorm = 0.2286, lr_0 = 2.3214e-04
Loss = 4.6784e-03, PNorm = 152.4387, GNorm = 0.3206, lr_0 = 2.3198e-04
Loss = 2.5402e-03, PNorm = 152.4457, GNorm = 0.1790, lr_0 = 2.3182e-04
Loss = 3.1599e-03, PNorm = 152.4549, GNorm = 0.1290, lr_0 = 2.3166e-04
Loss = 2.5074e-03, PNorm = 152.4632, GNorm = 0.1540, lr_0 = 2.3150e-04
Loss = 2.2662e-03, PNorm = 152.4699, GNorm = 0.2406, lr_0 = 2.3134e-04
Loss = 3.2103e-03, PNorm = 152.4749, GNorm = 0.2009, lr_0 = 2.3118e-04
Loss = 3.2016e-03, PNorm = 152.4767, GNorm = 0.0655, lr_0 = 2.3103e-04
Loss = 4.3476e-03, PNorm = 152.4803, GNorm = 0.1610, lr_0 = 2.3087e-04
Loss = 2.1107e-03, PNorm = 152.4873, GNorm = 0.0562, lr_0 = 2.3071e-04
Loss = 2.6103e-03, PNorm = 152.4943, GNorm = 0.1224, lr_0 = 2.3055e-04
Loss = 2.7235e-03, PNorm = 152.5015, GNorm = 0.1637, lr_0 = 2.3039e-04
Loss = 5.1573e-03, PNorm = 152.5102, GNorm = 0.2297, lr_0 = 2.3024e-04
Loss = 4.1308e-03, PNorm = 152.5132, GNorm = 0.0565, lr_0 = 2.3008e-04
Loss = 4.3501e-03, PNorm = 152.5177, GNorm = 0.1397, lr_0 = 2.2992e-04
Loss = 2.1052e-03, PNorm = 152.5233, GNorm = 0.1365, lr_0 = 2.2976e-04
Loss = 3.3684e-03, PNorm = 152.5288, GNorm = 0.4148, lr_0 = 2.2961e-04
Loss = 2.4533e-03, PNorm = 152.5354, GNorm = 0.0852, lr_0 = 2.2945e-04
Loss = 2.6757e-03, PNorm = 152.5411, GNorm = 0.0584, lr_0 = 2.2929e-04
Loss = 2.1435e-03, PNorm = 152.5475, GNorm = 0.2242, lr_0 = 2.2913e-04
Loss = 5.2042e-03, PNorm = 152.5576, GNorm = 0.1559, lr_0 = 2.2898e-04
Loss = 3.4236e-03, PNorm = 152.5656, GNorm = 0.2011, lr_0 = 2.2882e-04
Loss = 3.3404e-03, PNorm = 152.5694, GNorm = 0.1678, lr_0 = 2.2866e-04
Loss = 3.8605e-03, PNorm = 152.5758, GNorm = 0.3543, lr_0 = 2.2851e-04
Loss = 2.4368e-03, PNorm = 152.5829, GNorm = 0.0972, lr_0 = 2.2835e-04
Loss = 1.9289e-03, PNorm = 152.5871, GNorm = 0.1882, lr_0 = 2.2819e-04
Loss = 2.0658e-03, PNorm = 152.5927, GNorm = 0.0940, lr_0 = 2.2804e-04
Loss = 3.0894e-03, PNorm = 152.5998, GNorm = 0.1498, lr_0 = 2.2788e-04
Loss = 2.8053e-03, PNorm = 152.6039, GNorm = 0.0459, lr_0 = 2.2773e-04
Loss = 2.5519e-03, PNorm = 152.6081, GNorm = 0.1040, lr_0 = 2.2757e-04
Validation mae = 0.476718
Epoch 20
Loss = 2.6428e-03, PNorm = 152.6124, GNorm = 0.0804, lr_0 = 2.2741e-04
Loss = 4.0599e-03, PNorm = 152.6150, GNorm = 0.1859, lr_0 = 2.2726e-04
Loss = 2.6527e-03, PNorm = 152.6188, GNorm = 0.1171, lr_0 = 2.2710e-04
Loss = 2.1481e-03, PNorm = 152.6236, GNorm = 0.0972, lr_0 = 2.2695e-04
Loss = 4.1454e-03, PNorm = 152.6267, GNorm = 0.1543, lr_0 = 2.2679e-04
Loss = 2.6685e-03, PNorm = 152.6289, GNorm = 0.3594, lr_0 = 2.2664e-04
Loss = 3.0517e-03, PNorm = 152.6331, GNorm = 0.1867, lr_0 = 2.2648e-04
Loss = 3.0377e-03, PNorm = 152.6369, GNorm = 0.0619, lr_0 = 2.2632e-04
Loss = 3.0440e-03, PNorm = 152.6426, GNorm = 0.1655, lr_0 = 2.2617e-04
Loss = 2.7082e-03, PNorm = 152.6480, GNorm = 0.0478, lr_0 = 2.2601e-04
Loss = 1.9306e-03, PNorm = 152.6555, GNorm = 0.1873, lr_0 = 2.2586e-04
Loss = 3.8235e-03, PNorm = 152.6599, GNorm = 0.0595, lr_0 = 2.2571e-04
Loss = 3.2074e-03, PNorm = 152.6640, GNorm = 0.1000, lr_0 = 2.2555e-04
Loss = 2.6032e-03, PNorm = 152.6700, GNorm = 0.2012, lr_0 = 2.2540e-04
Loss = 1.6610e-03, PNorm = 152.6736, GNorm = 0.2244, lr_0 = 2.2524e-04
Loss = 2.3735e-03, PNorm = 152.6750, GNorm = 0.1563, lr_0 = 2.2509e-04
Loss = 2.3884e-03, PNorm = 152.6787, GNorm = 0.2504, lr_0 = 2.2493e-04
Loss = 3.1609e-03, PNorm = 152.6819, GNorm = 0.4595, lr_0 = 2.2478e-04
Loss = 3.4157e-03, PNorm = 152.6879, GNorm = 0.2876, lr_0 = 2.2463e-04
Loss = 1.9463e-03, PNorm = 152.6942, GNorm = 0.1567, lr_0 = 2.2447e-04
Loss = 3.2850e-03, PNorm = 152.6990, GNorm = 0.2286, lr_0 = 2.2432e-04
Loss = 2.3149e-03, PNorm = 152.7064, GNorm = 0.1872, lr_0 = 2.2416e-04
Loss = 3.1442e-03, PNorm = 152.7129, GNorm = 0.1704, lr_0 = 2.2401e-04
Loss = 2.4042e-03, PNorm = 152.7185, GNorm = 0.1809, lr_0 = 2.2386e-04
Loss = 2.6892e-03, PNorm = 152.7262, GNorm = 0.1445, lr_0 = 2.2370e-04
Loss = 2.6548e-03, PNorm = 152.7321, GNorm = 0.3268, lr_0 = 2.2355e-04
Loss = 1.8686e-03, PNorm = 152.7360, GNorm = 0.1465, lr_0 = 2.2340e-04
Loss = 2.8828e-03, PNorm = 152.7390, GNorm = 0.1414, lr_0 = 2.2324e-04
Loss = 2.3782e-03, PNorm = 152.7419, GNorm = 0.0795, lr_0 = 2.2309e-04
Loss = 3.0868e-03, PNorm = 152.7452, GNorm = 0.0912, lr_0 = 2.2294e-04
Loss = 2.2249e-03, PNorm = 152.7499, GNorm = 0.1308, lr_0 = 2.2279e-04
Loss = 2.8306e-03, PNorm = 152.7558, GNorm = 0.3063, lr_0 = 2.2263e-04
Loss = 2.6223e-03, PNorm = 152.7600, GNorm = 0.0884, lr_0 = 2.2248e-04
Loss = 3.8628e-03, PNorm = 152.7625, GNorm = 0.1135, lr_0 = 2.2233e-04
Loss = 2.2459e-03, PNorm = 152.7672, GNorm = 0.2179, lr_0 = 2.2218e-04
Loss = 1.8126e-03, PNorm = 152.7724, GNorm = 0.0631, lr_0 = 2.2202e-04
Loss = 1.8383e-03, PNorm = 152.7776, GNorm = 0.0861, lr_0 = 2.2187e-04
Loss = 2.9792e-03, PNorm = 152.7859, GNorm = 0.0396, lr_0 = 2.2172e-04
Loss = 2.0727e-03, PNorm = 152.7917, GNorm = 0.1784, lr_0 = 2.2157e-04
Loss = 5.2610e-03, PNorm = 152.7961, GNorm = 0.1283, lr_0 = 2.2142e-04
Loss = 2.2652e-03, PNorm = 152.8019, GNorm = 0.0865, lr_0 = 2.2126e-04
Loss = 2.7587e-03, PNorm = 152.8066, GNorm = 0.2784, lr_0 = 2.2111e-04
Loss = 3.8487e-03, PNorm = 152.8079, GNorm = 0.2881, lr_0 = 2.2096e-04
Loss = 2.6213e-03, PNorm = 152.8101, GNorm = 0.2327, lr_0 = 2.2081e-04
Loss = 2.5740e-03, PNorm = 152.8180, GNorm = 0.1861, lr_0 = 2.2066e-04
Loss = 2.4723e-03, PNorm = 152.8205, GNorm = 0.1999, lr_0 = 2.2051e-04
Loss = 2.0533e-03, PNorm = 152.8251, GNorm = 0.0509, lr_0 = 2.2036e-04
Loss = 3.3479e-03, PNorm = 152.8299, GNorm = 0.1312, lr_0 = 2.2021e-04
Loss = 2.1679e-03, PNorm = 152.8337, GNorm = 0.2872, lr_0 = 2.2005e-04
Loss = 3.4076e-03, PNorm = 152.8368, GNorm = 0.3848, lr_0 = 2.1990e-04
Loss = 1.8161e-03, PNorm = 152.8404, GNorm = 0.1119, lr_0 = 2.1975e-04
Loss = 2.7553e-03, PNorm = 152.8450, GNorm = 0.4188, lr_0 = 2.1960e-04
Loss = 2.1032e-03, PNorm = 152.8483, GNorm = 0.1325, lr_0 = 2.1945e-04
Loss = 2.3333e-03, PNorm = 152.8533, GNorm = 0.2223, lr_0 = 2.1930e-04
Loss = 1.8755e-03, PNorm = 152.8581, GNorm = 0.1064, lr_0 = 2.1915e-04
Loss = 2.8555e-03, PNorm = 152.8641, GNorm = 0.0682, lr_0 = 2.1900e-04
Loss = 2.4015e-03, PNorm = 152.8694, GNorm = 0.0566, lr_0 = 2.1885e-04
Loss = 4.2670e-03, PNorm = 152.8765, GNorm = 0.7057, lr_0 = 2.1870e-04
Loss = 2.6025e-03, PNorm = 152.8823, GNorm = 0.2120, lr_0 = 2.1855e-04
Loss = 1.9386e-03, PNorm = 152.8843, GNorm = 0.1108, lr_0 = 2.1840e-04
Loss = 2.1706e-03, PNorm = 152.8890, GNorm = 0.1413, lr_0 = 2.1825e-04
Loss = 2.4656e-03, PNorm = 152.8933, GNorm = 0.1686, lr_0 = 2.1810e-04
Loss = 2.8895e-03, PNorm = 152.8981, GNorm = 0.1322, lr_0 = 2.1795e-04
Loss = 1.9508e-03, PNorm = 152.8993, GNorm = 0.2042, lr_0 = 2.1780e-04
Loss = 1.8133e-03, PNorm = 152.9009, GNorm = 0.1037, lr_0 = 2.1765e-04
Loss = 4.0595e-03, PNorm = 152.9040, GNorm = 0.2982, lr_0 = 2.1751e-04
Loss = 2.8476e-03, PNorm = 152.9078, GNorm = 0.2162, lr_0 = 2.1736e-04
Loss = 1.9157e-03, PNorm = 152.9139, GNorm = 0.1214, lr_0 = 2.1721e-04
Loss = 4.5658e-03, PNorm = 152.9203, GNorm = 0.1668, lr_0 = 2.1706e-04
Loss = 2.5014e-03, PNorm = 152.9264, GNorm = 0.1437, lr_0 = 2.1691e-04
Loss = 1.8691e-03, PNorm = 152.9327, GNorm = 0.2277, lr_0 = 2.1676e-04
Loss = 3.6539e-03, PNorm = 152.9350, GNorm = 0.1482, lr_0 = 2.1661e-04
Loss = 2.1208e-03, PNorm = 152.9393, GNorm = 0.1095, lr_0 = 2.1646e-04
Loss = 2.0432e-03, PNorm = 152.9423, GNorm = 0.1012, lr_0 = 2.1632e-04
Loss = 1.7103e-03, PNorm = 152.9456, GNorm = 0.1490, lr_0 = 2.1617e-04
Loss = 2.2403e-03, PNorm = 152.9513, GNorm = 0.0966, lr_0 = 2.1602e-04
Loss = 1.8425e-03, PNorm = 152.9576, GNorm = 0.1529, lr_0 = 2.1587e-04
Loss = 1.9650e-03, PNorm = 152.9621, GNorm = 0.0898, lr_0 = 2.1572e-04
Loss = 2.3629e-03, PNorm = 152.9685, GNorm = 0.1510, lr_0 = 2.1558e-04
Loss = 1.7239e-03, PNorm = 152.9743, GNorm = 0.1142, lr_0 = 2.1543e-04
Loss = 2.3827e-03, PNorm = 152.9782, GNorm = 0.0952, lr_0 = 2.1528e-04
Loss = 2.3301e-03, PNorm = 152.9837, GNorm = 0.1499, lr_0 = 2.1513e-04
Loss = 1.6750e-03, PNorm = 152.9888, GNorm = 0.2015, lr_0 = 2.1499e-04
Loss = 1.7894e-03, PNorm = 152.9895, GNorm = 0.0692, lr_0 = 2.1484e-04
Loss = 2.1574e-03, PNorm = 152.9951, GNorm = 0.0880, lr_0 = 2.1469e-04
Loss = 1.5641e-03, PNorm = 153.0004, GNorm = 0.0878, lr_0 = 2.1454e-04
Loss = 3.9302e-03, PNorm = 153.0078, GNorm = 0.1257, lr_0 = 2.1440e-04
Loss = 2.0695e-03, PNorm = 153.0129, GNorm = 0.1925, lr_0 = 2.1425e-04
Loss = 2.4378e-03, PNorm = 153.0171, GNorm = 0.0355, lr_0 = 2.1410e-04
Loss = 1.8717e-03, PNorm = 153.0225, GNorm = 0.2216, lr_0 = 2.1396e-04
Loss = 4.0124e-03, PNorm = 153.0247, GNorm = 0.2610, lr_0 = 2.1381e-04
Loss = 3.7717e-03, PNorm = 153.0306, GNorm = 0.1296, lr_0 = 2.1366e-04
Loss = 2.5048e-03, PNorm = 153.0375, GNorm = 0.1159, lr_0 = 2.1352e-04
Loss = 2.4745e-03, PNorm = 153.0450, GNorm = 0.0924, lr_0 = 2.1337e-04
Loss = 4.2188e-03, PNorm = 153.0531, GNorm = 0.2406, lr_0 = 2.1323e-04
Loss = 1.8203e-03, PNorm = 153.0586, GNorm = 0.1538, lr_0 = 2.1308e-04
Loss = 3.3479e-03, PNorm = 153.0612, GNorm = 0.2150, lr_0 = 2.1293e-04
Loss = 3.3473e-03, PNorm = 153.0651, GNorm = 0.0692, lr_0 = 2.1279e-04
Loss = 1.9424e-03, PNorm = 153.0701, GNorm = 0.1278, lr_0 = 2.1264e-04
Loss = 2.2321e-03, PNorm = 153.0746, GNorm = 0.0517, lr_0 = 2.1250e-04
Loss = 5.1324e-03, PNorm = 153.0800, GNorm = 0.0976, lr_0 = 2.1235e-04
Loss = 3.9852e-03, PNorm = 153.0838, GNorm = 0.1782, lr_0 = 2.1221e-04
Loss = 3.7482e-03, PNorm = 153.0886, GNorm = 0.1463, lr_0 = 2.1206e-04
Loss = 2.5243e-03, PNorm = 153.0943, GNorm = 0.1668, lr_0 = 2.1191e-04
Loss = 1.9796e-03, PNorm = 153.1007, GNorm = 0.2420, lr_0 = 2.1177e-04
Loss = 3.7602e-03, PNorm = 153.1034, GNorm = 0.1575, lr_0 = 2.1162e-04
Loss = 1.6973e-03, PNorm = 153.1077, GNorm = 0.0864, lr_0 = 2.1148e-04
Loss = 2.3444e-03, PNorm = 153.1138, GNorm = 0.0722, lr_0 = 2.1133e-04
Loss = 2.0720e-03, PNorm = 153.1201, GNorm = 0.0617, lr_0 = 2.1119e-04
Loss = 2.9386e-03, PNorm = 153.1253, GNorm = 0.1605, lr_0 = 2.1104e-04
Loss = 2.5652e-03, PNorm = 153.1288, GNorm = 0.0567, lr_0 = 2.1090e-04
Loss = 2.6901e-03, PNorm = 153.1358, GNorm = 0.3548, lr_0 = 2.1076e-04
Loss = 2.9266e-03, PNorm = 153.1422, GNorm = 0.3421, lr_0 = 2.1061e-04
Loss = 2.6955e-03, PNorm = 153.1460, GNorm = 0.1159, lr_0 = 2.1047e-04
Loss = 2.0804e-03, PNorm = 153.1508, GNorm = 0.1239, lr_0 = 2.1032e-04
Loss = 3.9313e-03, PNorm = 153.1538, GNorm = 0.2018, lr_0 = 2.1018e-04
Loss = 2.1337e-03, PNorm = 153.1599, GNorm = 0.1999, lr_0 = 2.1003e-04
Loss = 2.2635e-03, PNorm = 153.1644, GNorm = 0.1155, lr_0 = 2.0989e-04
Loss = 3.1792e-03, PNorm = 153.1702, GNorm = 0.2105, lr_0 = 2.0975e-04
Loss = 1.8634e-03, PNorm = 153.1765, GNorm = 0.0435, lr_0 = 2.0960e-04
Validation mae = 0.477042
Epoch 21
Loss = 3.0514e-03, PNorm = 153.1801, GNorm = 0.1371, lr_0 = 2.0946e-04
Loss = 1.9166e-03, PNorm = 153.1852, GNorm = 0.2030, lr_0 = 2.0932e-04
Loss = 2.3885e-03, PNorm = 153.1890, GNorm = 0.2242, lr_0 = 2.0917e-04
Loss = 2.0639e-03, PNorm = 153.1918, GNorm = 0.1752, lr_0 = 2.0903e-04
Loss = 1.5432e-03, PNorm = 153.1948, GNorm = 0.1004, lr_0 = 2.0889e-04
Loss = 2.1771e-03, PNorm = 153.1979, GNorm = 0.1806, lr_0 = 2.0874e-04
Loss = 2.8358e-03, PNorm = 153.2007, GNorm = 0.1080, lr_0 = 2.0860e-04
Loss = 1.9678e-03, PNorm = 153.2031, GNorm = 0.0976, lr_0 = 2.0846e-04
Loss = 3.1991e-03, PNorm = 153.2063, GNorm = 0.1441, lr_0 = 2.0831e-04
Loss = 1.8355e-03, PNorm = 153.2114, GNorm = 0.0393, lr_0 = 2.0817e-04
Loss = 2.0661e-03, PNorm = 153.2157, GNorm = 0.0530, lr_0 = 2.0803e-04
Loss = 2.2078e-03, PNorm = 153.2233, GNorm = 0.2031, lr_0 = 2.0789e-04
Loss = 2.5927e-03, PNorm = 153.2272, GNorm = 0.1210, lr_0 = 2.0774e-04
Loss = 2.6088e-03, PNorm = 153.2312, GNorm = 0.2842, lr_0 = 2.0760e-04
Loss = 3.1728e-03, PNorm = 153.2346, GNorm = 0.1234, lr_0 = 2.0746e-04
Loss = 2.8314e-03, PNorm = 153.2372, GNorm = 0.1583, lr_0 = 2.0732e-04
Loss = 2.2311e-03, PNorm = 153.2402, GNorm = 0.1137, lr_0 = 2.0718e-04
Loss = 1.9138e-03, PNorm = 153.2443, GNorm = 0.0816, lr_0 = 2.0703e-04
Loss = 1.9906e-03, PNorm = 153.2487, GNorm = 0.1275, lr_0 = 2.0689e-04
Loss = 1.6602e-03, PNorm = 153.2526, GNorm = 0.1309, lr_0 = 2.0675e-04
Loss = 1.8233e-03, PNorm = 153.2539, GNorm = 0.0335, lr_0 = 2.0661e-04
Loss = 1.7791e-03, PNorm = 153.2567, GNorm = 0.0884, lr_0 = 2.0647e-04
Loss = 3.0330e-03, PNorm = 153.2607, GNorm = 0.1293, lr_0 = 2.0633e-04
Loss = 4.2298e-03, PNorm = 153.2659, GNorm = 0.1746, lr_0 = 2.0618e-04
Loss = 1.7765e-03, PNorm = 153.2723, GNorm = 0.0805, lr_0 = 2.0604e-04
Loss = 1.6023e-03, PNorm = 153.2764, GNorm = 0.1634, lr_0 = 2.0590e-04
Loss = 3.2711e-03, PNorm = 153.2769, GNorm = 0.1492, lr_0 = 2.0576e-04
Loss = 2.1819e-03, PNorm = 153.2790, GNorm = 0.0505, lr_0 = 2.0562e-04
Loss = 2.4181e-03, PNorm = 153.2834, GNorm = 0.2132, lr_0 = 2.0548e-04
Loss = 2.0880e-03, PNorm = 153.2898, GNorm = 0.0984, lr_0 = 2.0534e-04
Loss = 2.0826e-03, PNorm = 153.2975, GNorm = 0.1205, lr_0 = 2.0520e-04
Loss = 4.0880e-03, PNorm = 153.3014, GNorm = 0.0909, lr_0 = 2.0506e-04
Loss = 2.6018e-03, PNorm = 153.3045, GNorm = 0.1162, lr_0 = 2.0492e-04
Loss = 2.4301e-03, PNorm = 153.3073, GNorm = 0.1004, lr_0 = 2.0478e-04
Loss = 1.8395e-03, PNorm = 153.3135, GNorm = 0.0529, lr_0 = 2.0464e-04
Loss = 2.7533e-03, PNorm = 153.3180, GNorm = 0.1001, lr_0 = 2.0450e-04
Loss = 1.8031e-03, PNorm = 153.3220, GNorm = 0.1580, lr_0 = 2.0436e-04
Loss = 2.1431e-03, PNorm = 153.3263, GNorm = 0.2288, lr_0 = 2.0422e-04
Loss = 1.8413e-03, PNorm = 153.3296, GNorm = 0.1366, lr_0 = 2.0408e-04
Loss = 1.5524e-03, PNorm = 153.3312, GNorm = 0.3597, lr_0 = 2.0394e-04
Loss = 1.7777e-03, PNorm = 153.3335, GNorm = 0.1007, lr_0 = 2.0380e-04
Loss = 2.3407e-03, PNorm = 153.3375, GNorm = 0.1652, lr_0 = 2.0366e-04
Loss = 2.9966e-03, PNorm = 153.3441, GNorm = 0.1859, lr_0 = 2.0352e-04
Loss = 2.8793e-03, PNorm = 153.3494, GNorm = 0.1394, lr_0 = 2.0338e-04
Loss = 1.9958e-03, PNorm = 153.3526, GNorm = 0.0510, lr_0 = 2.0324e-04
Loss = 2.0524e-03, PNorm = 153.3564, GNorm = 0.0694, lr_0 = 2.0310e-04
Loss = 1.7037e-03, PNorm = 153.3583, GNorm = 0.1169, lr_0 = 2.0296e-04
Loss = 1.6490e-03, PNorm = 153.3632, GNorm = 0.0533, lr_0 = 2.0282e-04
Loss = 2.2602e-03, PNorm = 153.3655, GNorm = 0.0386, lr_0 = 2.0268e-04
Loss = 1.5983e-03, PNorm = 153.3712, GNorm = 0.0420, lr_0 = 2.0254e-04
Loss = 1.5358e-03, PNorm = 153.3759, GNorm = 0.0447, lr_0 = 2.0240e-04
Loss = 1.7722e-03, PNorm = 153.3803, GNorm = 0.0851, lr_0 = 2.0227e-04
Loss = 4.0513e-03, PNorm = 153.3849, GNorm = 0.1975, lr_0 = 2.0213e-04
Loss = 1.5928e-03, PNorm = 153.3876, GNorm = 0.1231, lr_0 = 2.0199e-04
Loss = 1.8602e-03, PNorm = 153.3917, GNorm = 0.1270, lr_0 = 2.0185e-04
Loss = 1.9158e-03, PNorm = 153.3927, GNorm = 0.1507, lr_0 = 2.0171e-04
Loss = 1.8295e-03, PNorm = 153.3952, GNorm = 0.1171, lr_0 = 2.0157e-04
Loss = 3.4893e-03, PNorm = 153.3963, GNorm = 0.0580, lr_0 = 2.0144e-04
Loss = 4.3061e-03, PNorm = 153.4013, GNorm = 0.2510, lr_0 = 2.0130e-04
Loss = 1.6655e-03, PNorm = 153.4067, GNorm = 0.0483, lr_0 = 2.0116e-04
Loss = 2.1104e-03, PNorm = 153.4101, GNorm = 0.1131, lr_0 = 2.0102e-04
Loss = 1.7412e-03, PNorm = 153.4125, GNorm = 0.1624, lr_0 = 2.0088e-04
Loss = 2.0508e-03, PNorm = 153.4158, GNorm = 0.1275, lr_0 = 2.0075e-04
Loss = 3.5163e-03, PNorm = 153.4206, GNorm = 0.1374, lr_0 = 2.0061e-04
Loss = 1.8038e-03, PNorm = 153.4242, GNorm = 0.1064, lr_0 = 2.0047e-04
Loss = 1.9334e-03, PNorm = 153.4282, GNorm = 0.1033, lr_0 = 2.0033e-04
Loss = 2.1375e-03, PNorm = 153.4342, GNorm = 0.1785, lr_0 = 2.0020e-04
Loss = 3.2438e-03, PNorm = 153.4410, GNorm = 0.1607, lr_0 = 2.0006e-04
Loss = 3.3010e-03, PNorm = 153.4434, GNorm = 0.2287, lr_0 = 1.9992e-04
Loss = 2.4217e-03, PNorm = 153.4446, GNorm = 0.0642, lr_0 = 1.9979e-04
Loss = 1.7779e-03, PNorm = 153.4513, GNorm = 0.3591, lr_0 = 1.9965e-04
Loss = 2.2486e-03, PNorm = 153.4567, GNorm = 0.2474, lr_0 = 1.9951e-04
Loss = 1.4344e-03, PNorm = 153.4613, GNorm = 0.0828, lr_0 = 1.9938e-04
Loss = 2.7978e-03, PNorm = 153.4626, GNorm = 0.1602, lr_0 = 1.9924e-04
Loss = 3.1038e-03, PNorm = 153.4649, GNorm = 0.2002, lr_0 = 1.9910e-04
Loss = 1.7502e-03, PNorm = 153.4707, GNorm = 0.1371, lr_0 = 1.9897e-04
Loss = 2.0539e-03, PNorm = 153.4785, GNorm = 0.0749, lr_0 = 1.9883e-04
Loss = 2.1974e-03, PNorm = 153.4831, GNorm = 0.1431, lr_0 = 1.9869e-04
Loss = 1.7243e-03, PNorm = 153.4859, GNorm = 0.0915, lr_0 = 1.9856e-04
Loss = 6.4221e-03, PNorm = 153.4891, GNorm = 0.1852, lr_0 = 1.9842e-04
Loss = 1.9846e-03, PNorm = 153.4954, GNorm = 0.1364, lr_0 = 1.9829e-04
Loss = 1.9387e-03, PNorm = 153.4984, GNorm = 0.1110, lr_0 = 1.9815e-04
Loss = 1.9847e-03, PNorm = 153.5022, GNorm = 0.2174, lr_0 = 1.9801e-04
Loss = 2.1392e-03, PNorm = 153.5081, GNorm = 0.4376, lr_0 = 1.9788e-04
Loss = 3.0173e-03, PNorm = 153.5126, GNorm = 0.1191, lr_0 = 1.9774e-04
Loss = 2.5000e-03, PNorm = 153.5178, GNorm = 0.0480, lr_0 = 1.9761e-04
Loss = 2.6553e-03, PNorm = 153.5223, GNorm = 0.1123, lr_0 = 1.9747e-04
Loss = 2.4815e-03, PNorm = 153.5260, GNorm = 0.0834, lr_0 = 1.9734e-04
Loss = 2.7061e-03, PNorm = 153.5303, GNorm = 0.2081, lr_0 = 1.9720e-04
Loss = 2.8051e-03, PNorm = 153.5365, GNorm = 0.1762, lr_0 = 1.9707e-04
Loss = 1.6298e-03, PNorm = 153.5407, GNorm = 0.0918, lr_0 = 1.9693e-04
Loss = 1.8025e-03, PNorm = 153.5456, GNorm = 0.0985, lr_0 = 1.9680e-04
Loss = 2.0793e-03, PNorm = 153.5499, GNorm = 0.1269, lr_0 = 1.9666e-04
Loss = 3.2853e-03, PNorm = 153.5524, GNorm = 0.0929, lr_0 = 1.9653e-04
Loss = 1.9426e-03, PNorm = 153.5560, GNorm = 0.0539, lr_0 = 1.9639e-04
Loss = 1.7748e-03, PNorm = 153.5625, GNorm = 0.0591, lr_0 = 1.9626e-04
Loss = 1.7910e-03, PNorm = 153.5660, GNorm = 0.2215, lr_0 = 1.9612e-04
Loss = 2.1652e-03, PNorm = 153.5687, GNorm = 0.0574, lr_0 = 1.9599e-04
Loss = 3.0743e-03, PNorm = 153.5737, GNorm = 0.1096, lr_0 = 1.9585e-04
Loss = 4.1178e-03, PNorm = 153.5786, GNorm = 0.1646, lr_0 = 1.9572e-04
Loss = 4.2732e-03, PNorm = 153.5828, GNorm = 0.0562, lr_0 = 1.9559e-04
Loss = 1.9203e-03, PNorm = 153.5891, GNorm = 0.1688, lr_0 = 1.9545e-04
Loss = 1.4610e-03, PNorm = 153.5926, GNorm = 0.1717, lr_0 = 1.9532e-04
Loss = 1.8172e-03, PNorm = 153.5957, GNorm = 0.1724, lr_0 = 1.9518e-04
Loss = 2.0512e-03, PNorm = 153.5983, GNorm = 0.0916, lr_0 = 1.9505e-04
Loss = 2.3480e-03, PNorm = 153.6057, GNorm = 0.0708, lr_0 = 1.9492e-04
Loss = 2.4686e-03, PNorm = 153.6119, GNorm = 0.1095, lr_0 = 1.9478e-04
Loss = 1.6662e-03, PNorm = 153.6172, GNorm = 0.0513, lr_0 = 1.9465e-04
Loss = 4.4265e-03, PNorm = 153.6210, GNorm = 0.1061, lr_0 = 1.9452e-04
Loss = 2.6479e-03, PNorm = 153.6233, GNorm = 0.1829, lr_0 = 1.9438e-04
Loss = 2.4059e-03, PNorm = 153.6258, GNorm = 0.1152, lr_0 = 1.9425e-04
Loss = 3.7469e-03, PNorm = 153.6315, GNorm = 0.1931, lr_0 = 1.9412e-04
Loss = 5.1170e-03, PNorm = 153.6343, GNorm = 0.2849, lr_0 = 1.9398e-04
Loss = 1.8398e-03, PNorm = 153.6384, GNorm = 0.2146, lr_0 = 1.9385e-04
Loss = 2.3730e-03, PNorm = 153.6430, GNorm = 0.1372, lr_0 = 1.9372e-04
Loss = 2.9408e-03, PNorm = 153.6480, GNorm = 0.1316, lr_0 = 1.9359e-04
Loss = 1.5488e-03, PNorm = 153.6526, GNorm = 0.1822, lr_0 = 1.9345e-04
Loss = 1.6649e-03, PNorm = 153.6588, GNorm = 0.1710, lr_0 = 1.9332e-04
Loss = 1.6725e-03, PNorm = 153.6637, GNorm = 0.1790, lr_0 = 1.9319e-04
Loss = 1.8756e-03, PNorm = 153.6675, GNorm = 0.1225, lr_0 = 1.9306e-04
Validation mae = 0.475571
Epoch 22
Loss = 2.2566e-03, PNorm = 153.6708, GNorm = 0.1717, lr_0 = 1.9292e-04
Loss = 1.7120e-03, PNorm = 153.6742, GNorm = 0.1065, lr_0 = 1.9279e-04
Loss = 1.7283e-03, PNorm = 153.6778, GNorm = 0.0423, lr_0 = 1.9266e-04
Loss = 2.1121e-03, PNorm = 153.6811, GNorm = 0.2435, lr_0 = 1.9253e-04
Loss = 2.7863e-03, PNorm = 153.6822, GNorm = 0.2302, lr_0 = 1.9240e-04
Loss = 2.3501e-03, PNorm = 153.6865, GNorm = 0.1201, lr_0 = 1.9226e-04
Loss = 2.4431e-03, PNorm = 153.6889, GNorm = 0.2373, lr_0 = 1.9213e-04
Loss = 1.9346e-03, PNorm = 153.6935, GNorm = 0.2583, lr_0 = 1.9200e-04
Loss = 3.0852e-03, PNorm = 153.6960, GNorm = 0.1427, lr_0 = 1.9187e-04
Loss = 1.8676e-03, PNorm = 153.6990, GNorm = 0.1675, lr_0 = 1.9174e-04
Loss = 1.9519e-03, PNorm = 153.7023, GNorm = 0.1641, lr_0 = 1.9161e-04
Loss = 1.9818e-03, PNorm = 153.7070, GNorm = 0.0440, lr_0 = 1.9148e-04
Loss = 1.4401e-03, PNorm = 153.7092, GNorm = 0.0712, lr_0 = 1.9134e-04
Loss = 3.0420e-03, PNorm = 153.7116, GNorm = 0.2489, lr_0 = 1.9121e-04
Loss = 1.9701e-03, PNorm = 153.7140, GNorm = 0.1296, lr_0 = 1.9108e-04
Loss = 1.6298e-03, PNorm = 153.7182, GNorm = 0.0488, lr_0 = 1.9095e-04
Loss = 1.5558e-03, PNorm = 153.7202, GNorm = 0.1379, lr_0 = 1.9082e-04
Loss = 2.7689e-03, PNorm = 153.7225, GNorm = 0.0996, lr_0 = 1.9069e-04
Loss = 1.6113e-03, PNorm = 153.7245, GNorm = 0.1855, lr_0 = 1.9056e-04
Loss = 2.9182e-03, PNorm = 153.7279, GNorm = 0.1187, lr_0 = 1.9043e-04
Loss = 2.7208e-03, PNorm = 153.7305, GNorm = 0.0885, lr_0 = 1.9030e-04
Loss = 1.6644e-03, PNorm = 153.7341, GNorm = 0.1371, lr_0 = 1.9017e-04
Loss = 1.4626e-03, PNorm = 153.7385, GNorm = 0.2664, lr_0 = 1.9004e-04
Loss = 1.9699e-03, PNorm = 153.7421, GNorm = 0.0698, lr_0 = 1.8991e-04
Loss = 2.4298e-03, PNorm = 153.7439, GNorm = 0.1125, lr_0 = 1.8978e-04
Loss = 1.8411e-03, PNorm = 153.7476, GNorm = 0.1464, lr_0 = 1.8965e-04
Loss = 2.0032e-03, PNorm = 153.7495, GNorm = 0.0984, lr_0 = 1.8952e-04
Loss = 2.4316e-03, PNorm = 153.7531, GNorm = 0.0676, lr_0 = 1.8939e-04
Loss = 1.3878e-03, PNorm = 153.7533, GNorm = 0.0763, lr_0 = 1.8926e-04
Loss = 2.0174e-03, PNorm = 153.7560, GNorm = 0.1448, lr_0 = 1.8913e-04
Loss = 2.5295e-03, PNorm = 153.7595, GNorm = 0.0747, lr_0 = 1.8900e-04
Loss = 1.5414e-03, PNorm = 153.7607, GNorm = 0.1325, lr_0 = 1.8887e-04
Loss = 5.5538e-03, PNorm = 153.7623, GNorm = 0.2213, lr_0 = 1.8874e-04
Loss = 2.0640e-03, PNorm = 153.7672, GNorm = 0.0901, lr_0 = 1.8861e-04
Loss = 2.3703e-03, PNorm = 153.7731, GNorm = 0.1586, lr_0 = 1.8848e-04
Loss = 1.8056e-03, PNorm = 153.7772, GNorm = 0.1568, lr_0 = 1.8835e-04
Loss = 1.5604e-03, PNorm = 153.7818, GNorm = 0.0652, lr_0 = 1.8822e-04
Loss = 1.4573e-03, PNorm = 153.7861, GNorm = 0.1519, lr_0 = 1.8809e-04
Loss = 1.1074e-03, PNorm = 153.7907, GNorm = 0.0738, lr_0 = 1.8797e-04
Loss = 2.5874e-03, PNorm = 153.7951, GNorm = 0.0355, lr_0 = 1.8784e-04
Loss = 1.3792e-03, PNorm = 153.7965, GNorm = 0.0794, lr_0 = 1.8771e-04
Loss = 1.7882e-03, PNorm = 153.8001, GNorm = 0.2206, lr_0 = 1.8758e-04
Loss = 1.5008e-03, PNorm = 153.8037, GNorm = 0.1336, lr_0 = 1.8745e-04
Loss = 1.8645e-03, PNorm = 153.8070, GNorm = 0.1359, lr_0 = 1.8732e-04
Loss = 1.3044e-03, PNorm = 153.8103, GNorm = 0.1148, lr_0 = 1.8719e-04
Loss = 1.5301e-03, PNorm = 153.8132, GNorm = 0.0406, lr_0 = 1.8707e-04
Loss = 3.4229e-03, PNorm = 153.8186, GNorm = 0.2116, lr_0 = 1.8694e-04
Loss = 2.4244e-03, PNorm = 153.8231, GNorm = 0.0670, lr_0 = 1.8681e-04
Loss = 1.1122e-03, PNorm = 153.8270, GNorm = 0.1118, lr_0 = 1.8668e-04
Loss = 2.7868e-03, PNorm = 153.8297, GNorm = 0.0805, lr_0 = 1.8655e-04
Loss = 1.4711e-03, PNorm = 153.8333, GNorm = 0.0515, lr_0 = 1.8643e-04
Loss = 1.4502e-03, PNorm = 153.8367, GNorm = 0.0954, lr_0 = 1.8630e-04
Loss = 1.5173e-03, PNorm = 153.8401, GNorm = 0.0805, lr_0 = 1.8617e-04
Loss = 1.6544e-03, PNorm = 153.8445, GNorm = 0.0872, lr_0 = 1.8604e-04
Loss = 1.8083e-03, PNorm = 153.8475, GNorm = 0.1176, lr_0 = 1.8592e-04
Loss = 1.8871e-03, PNorm = 153.8502, GNorm = 0.0622, lr_0 = 1.8579e-04
Loss = 2.4887e-03, PNorm = 153.8549, GNorm = 0.1259, lr_0 = 1.8566e-04
Loss = 1.5004e-03, PNorm = 153.8592, GNorm = 0.0931, lr_0 = 1.8553e-04
Loss = 1.9580e-03, PNorm = 153.8636, GNorm = 0.0763, lr_0 = 1.8541e-04
Loss = 1.3264e-03, PNorm = 153.8684, GNorm = 0.1416, lr_0 = 1.8528e-04
Loss = 3.0271e-03, PNorm = 153.8730, GNorm = 0.1033, lr_0 = 1.8515e-04
Loss = 3.0433e-03, PNorm = 153.8764, GNorm = 0.0635, lr_0 = 1.8503e-04
Loss = 2.1839e-03, PNorm = 153.8809, GNorm = 0.2743, lr_0 = 1.8490e-04
Loss = 2.1460e-03, PNorm = 153.8840, GNorm = 0.1453, lr_0 = 1.8477e-04
Loss = 2.4084e-03, PNorm = 153.8884, GNorm = 0.1420, lr_0 = 1.8465e-04
Loss = 1.3322e-03, PNorm = 153.8929, GNorm = 0.0940, lr_0 = 1.8452e-04
Loss = 2.6343e-03, PNorm = 153.8953, GNorm = 0.3364, lr_0 = 1.8439e-04
Loss = 4.1323e-03, PNorm = 153.9019, GNorm = 0.1051, lr_0 = 1.8427e-04
Loss = 2.5560e-03, PNorm = 153.9044, GNorm = 0.0940, lr_0 = 1.8414e-04
Loss = 1.8432e-03, PNorm = 153.9072, GNorm = 0.1107, lr_0 = 1.8401e-04
Loss = 1.7751e-03, PNorm = 153.9120, GNorm = 0.0905, lr_0 = 1.8389e-04
Loss = 1.6408e-03, PNorm = 153.9155, GNorm = 0.0642, lr_0 = 1.8376e-04
Loss = 2.0200e-03, PNorm = 153.9174, GNorm = 0.1637, lr_0 = 1.8364e-04
Loss = 1.4794e-03, PNorm = 153.9194, GNorm = 0.1304, lr_0 = 1.8351e-04
Loss = 1.5575e-03, PNorm = 153.9206, GNorm = 0.0446, lr_0 = 1.8338e-04
Loss = 1.7280e-03, PNorm = 153.9233, GNorm = 0.1150, lr_0 = 1.8326e-04
Loss = 5.1549e-03, PNorm = 153.9251, GNorm = 0.1325, lr_0 = 1.8313e-04
Loss = 3.2212e-03, PNorm = 153.9287, GNorm = 0.1063, lr_0 = 1.8301e-04
Loss = 2.3170e-03, PNorm = 153.9333, GNorm = 0.1684, lr_0 = 1.8288e-04
Loss = 1.7452e-03, PNorm = 153.9389, GNorm = 0.0522, lr_0 = 1.8276e-04
Loss = 1.5577e-03, PNorm = 153.9427, GNorm = 0.1344, lr_0 = 1.8263e-04
Loss = 1.4752e-03, PNorm = 153.9455, GNorm = 0.1024, lr_0 = 1.8251e-04
Loss = 1.4923e-03, PNorm = 153.9488, GNorm = 0.0473, lr_0 = 1.8238e-04
Loss = 2.3155e-03, PNorm = 153.9521, GNorm = 0.1853, lr_0 = 1.8226e-04
Loss = 4.5696e-03, PNorm = 153.9573, GNorm = 0.1048, lr_0 = 1.8213e-04
Loss = 1.8641e-03, PNorm = 153.9599, GNorm = 0.1551, lr_0 = 1.8201e-04
Loss = 4.1141e-03, PNorm = 153.9647, GNorm = 0.1494, lr_0 = 1.8188e-04
Loss = 2.9673e-03, PNorm = 153.9666, GNorm = 0.1317, lr_0 = 1.8176e-04
Loss = 1.5776e-03, PNorm = 153.9701, GNorm = 0.0447, lr_0 = 1.8163e-04
Loss = 1.4654e-03, PNorm = 153.9734, GNorm = 0.1413, lr_0 = 1.8151e-04
Loss = 1.9944e-03, PNorm = 153.9781, GNorm = 0.1487, lr_0 = 1.8138e-04
Loss = 1.3578e-03, PNorm = 153.9827, GNorm = 0.0401, lr_0 = 1.8126e-04
Loss = 1.6407e-03, PNorm = 153.9847, GNorm = 0.1116, lr_0 = 1.8114e-04
Loss = 2.1734e-03, PNorm = 153.9860, GNorm = 0.0729, lr_0 = 1.8101e-04
Loss = 1.8027e-03, PNorm = 153.9891, GNorm = 0.0981, lr_0 = 1.8089e-04
Loss = 1.3371e-03, PNorm = 153.9915, GNorm = 0.1366, lr_0 = 1.8076e-04
Loss = 2.0796e-03, PNorm = 153.9947, GNorm = 0.2143, lr_0 = 1.8064e-04
Loss = 1.5134e-03, PNorm = 153.9992, GNorm = 0.1629, lr_0 = 1.8052e-04
Loss = 1.3687e-03, PNorm = 154.0026, GNorm = 0.1408, lr_0 = 1.8039e-04
Loss = 2.8631e-03, PNorm = 154.0027, GNorm = 0.1315, lr_0 = 1.8027e-04
Loss = 1.7770e-03, PNorm = 154.0057, GNorm = 0.2168, lr_0 = 1.8015e-04
Loss = 2.8065e-03, PNorm = 154.0091, GNorm = 0.1856, lr_0 = 1.8002e-04
Loss = 1.5908e-03, PNorm = 154.0137, GNorm = 0.1999, lr_0 = 1.7990e-04
Loss = 1.5097e-03, PNorm = 154.0157, GNorm = 0.1103, lr_0 = 1.7978e-04
Loss = 2.8823e-03, PNorm = 154.0172, GNorm = 0.4371, lr_0 = 1.7965e-04
Loss = 1.6000e-03, PNorm = 154.0186, GNorm = 0.0795, lr_0 = 1.7953e-04
Loss = 2.1430e-03, PNorm = 154.0193, GNorm = 0.1673, lr_0 = 1.7941e-04
Loss = 4.4056e-03, PNorm = 154.0248, GNorm = 0.0908, lr_0 = 1.7928e-04
Loss = 1.2533e-03, PNorm = 154.0275, GNorm = 0.0739, lr_0 = 1.7916e-04
Loss = 1.6759e-03, PNorm = 154.0301, GNorm = 0.0434, lr_0 = 1.7904e-04
Loss = 1.7429e-03, PNorm = 154.0330, GNorm = 0.0647, lr_0 = 1.7892e-04
Loss = 2.1077e-03, PNorm = 154.0396, GNorm = 0.0663, lr_0 = 1.7879e-04
Loss = 2.3286e-03, PNorm = 154.0445, GNorm = 0.0925, lr_0 = 1.7867e-04
Loss = 1.8440e-03, PNorm = 154.0509, GNorm = 0.1508, lr_0 = 1.7855e-04
Loss = 1.9457e-03, PNorm = 154.0562, GNorm = 0.0685, lr_0 = 1.7843e-04
Loss = 2.9518e-03, PNorm = 154.0604, GNorm = 0.3106, lr_0 = 1.7830e-04
Loss = 3.5546e-03, PNorm = 154.0656, GNorm = 0.1212, lr_0 = 1.7818e-04
Loss = 1.2225e-03, PNorm = 154.0682, GNorm = 0.1350, lr_0 = 1.7806e-04
Loss = 1.5312e-03, PNorm = 154.0717, GNorm = 0.0430, lr_0 = 1.7794e-04
Loss = 1.2981e-03, PNorm = 154.0757, GNorm = 0.0755, lr_0 = 1.7782e-04
Validation mae = 0.476504
Epoch 23
Loss = 1.4229e-03, PNorm = 154.0789, GNorm = 0.0427, lr_0 = 1.7769e-04
Loss = 2.6032e-03, PNorm = 154.0800, GNorm = 0.2336, lr_0 = 1.7757e-04
Loss = 2.2504e-03, PNorm = 154.0803, GNorm = 0.0834, lr_0 = 1.7745e-04
Loss = 4.1221e-03, PNorm = 154.0837, GNorm = 0.1038, lr_0 = 1.7733e-04
Loss = 1.4103e-03, PNorm = 154.0858, GNorm = 0.0608, lr_0 = 1.7721e-04
Loss = 1.8514e-03, PNorm = 154.0873, GNorm = 0.0473, lr_0 = 1.7709e-04
Loss = 1.7073e-03, PNorm = 154.0897, GNorm = 0.0413, lr_0 = 1.7696e-04
Loss = 1.2378e-03, PNorm = 154.0921, GNorm = 0.1230, lr_0 = 1.7684e-04
Loss = 1.9180e-03, PNorm = 154.0945, GNorm = 0.0927, lr_0 = 1.7672e-04
Loss = 1.2597e-03, PNorm = 154.0956, GNorm = 0.1969, lr_0 = 1.7660e-04
Loss = 1.6843e-03, PNorm = 154.0987, GNorm = 0.0886, lr_0 = 1.7648e-04
Loss = 3.2111e-03, PNorm = 154.1023, GNorm = 0.1358, lr_0 = 1.7636e-04
Loss = 1.0717e-03, PNorm = 154.1048, GNorm = 0.0853, lr_0 = 1.7624e-04
Loss = 2.0164e-03, PNorm = 154.1065, GNorm = 0.0912, lr_0 = 1.7612e-04
Loss = 1.3198e-03, PNorm = 154.1086, GNorm = 0.0857, lr_0 = 1.7600e-04
Loss = 1.2301e-03, PNorm = 154.1121, GNorm = 0.0836, lr_0 = 1.7588e-04
Loss = 1.6239e-03, PNorm = 154.1144, GNorm = 0.0772, lr_0 = 1.7576e-04
Loss = 1.2677e-03, PNorm = 154.1182, GNorm = 0.0989, lr_0 = 1.7564e-04
Loss = 1.3267e-03, PNorm = 154.1194, GNorm = 0.2227, lr_0 = 1.7552e-04
Loss = 2.1796e-03, PNorm = 154.1204, GNorm = 0.0798, lr_0 = 1.7540e-04
Loss = 1.8084e-03, PNorm = 154.1216, GNorm = 0.1294, lr_0 = 1.7528e-04
Loss = 1.3865e-03, PNorm = 154.1260, GNorm = 0.0423, lr_0 = 1.7516e-04
Loss = 1.2076e-03, PNorm = 154.1299, GNorm = 0.1277, lr_0 = 1.7504e-04
Loss = 1.9987e-03, PNorm = 154.1320, GNorm = 0.2761, lr_0 = 1.7492e-04
Loss = 1.6367e-03, PNorm = 154.1344, GNorm = 0.1432, lr_0 = 1.7480e-04
Loss = 1.1126e-03, PNorm = 154.1372, GNorm = 0.1524, lr_0 = 1.7468e-04
Loss = 3.3250e-03, PNorm = 154.1405, GNorm = 0.1382, lr_0 = 1.7456e-04
Loss = 2.3448e-03, PNorm = 154.1445, GNorm = 0.1105, lr_0 = 1.7444e-04
Loss = 1.7565e-03, PNorm = 154.1473, GNorm = 0.1669, lr_0 = 1.7432e-04
Loss = 1.4023e-03, PNorm = 154.1489, GNorm = 0.1031, lr_0 = 1.7420e-04
Loss = 1.8798e-03, PNorm = 154.1533, GNorm = 0.1104, lr_0 = 1.7408e-04
Loss = 1.9191e-03, PNorm = 154.1559, GNorm = 0.0513, lr_0 = 1.7396e-04
Loss = 1.7087e-03, PNorm = 154.1589, GNorm = 0.1671, lr_0 = 1.7384e-04
Loss = 3.1344e-03, PNorm = 154.1617, GNorm = 0.0986, lr_0 = 1.7372e-04
Loss = 2.2594e-03, PNorm = 154.1633, GNorm = 0.0727, lr_0 = 1.7360e-04
Loss = 1.1297e-03, PNorm = 154.1652, GNorm = 0.0763, lr_0 = 1.7348e-04
Loss = 2.0815e-03, PNorm = 154.1658, GNorm = 0.3622, lr_0 = 1.7336e-04
Loss = 1.1640e-03, PNorm = 154.1709, GNorm = 0.1357, lr_0 = 1.7325e-04
Loss = 2.2552e-03, PNorm = 154.1757, GNorm = 0.3010, lr_0 = 1.7313e-04
Loss = 1.3964e-03, PNorm = 154.1803, GNorm = 0.1690, lr_0 = 1.7301e-04
Loss = 1.3903e-03, PNorm = 154.1819, GNorm = 0.0874, lr_0 = 1.7289e-04
Loss = 1.1596e-03, PNorm = 154.1849, GNorm = 0.0791, lr_0 = 1.7277e-04
Loss = 2.2043e-03, PNorm = 154.1884, GNorm = 0.1813, lr_0 = 1.7265e-04
Loss = 1.8675e-03, PNorm = 154.1928, GNorm = 0.0741, lr_0 = 1.7253e-04
Loss = 1.2892e-03, PNorm = 154.1951, GNorm = 0.1583, lr_0 = 1.7242e-04
Loss = 2.0924e-03, PNorm = 154.1972, GNorm = 0.0960, lr_0 = 1.7230e-04
Loss = 1.2118e-03, PNorm = 154.1991, GNorm = 0.1579, lr_0 = 1.7218e-04
Loss = 1.3902e-03, PNorm = 154.2015, GNorm = 0.0654, lr_0 = 1.7206e-04
Loss = 1.9707e-03, PNorm = 154.2037, GNorm = 0.0805, lr_0 = 1.7194e-04
Loss = 2.1204e-03, PNorm = 154.2066, GNorm = 0.2405, lr_0 = 1.7183e-04
Loss = 2.6636e-03, PNorm = 154.2110, GNorm = 0.0483, lr_0 = 1.7171e-04
Loss = 1.7991e-03, PNorm = 154.2134, GNorm = 0.3190, lr_0 = 1.7159e-04
Loss = 1.9706e-03, PNorm = 154.2170, GNorm = 0.0603, lr_0 = 1.7147e-04
Loss = 3.8416e-03, PNorm = 154.2191, GNorm = 0.1041, lr_0 = 1.7136e-04
Loss = 2.9460e-03, PNorm = 154.2230, GNorm = 0.2354, lr_0 = 1.7124e-04
Loss = 1.2490e-03, PNorm = 154.2250, GNorm = 0.0771, lr_0 = 1.7112e-04
Loss = 1.7528e-03, PNorm = 154.2272, GNorm = 0.1038, lr_0 = 1.7100e-04
Loss = 4.1306e-03, PNorm = 154.2312, GNorm = 0.5696, lr_0 = 1.7089e-04
Loss = 1.2784e-03, PNorm = 154.2315, GNorm = 0.0391, lr_0 = 1.7077e-04
Loss = 1.2584e-03, PNorm = 154.2339, GNorm = 0.0473, lr_0 = 1.7065e-04
Loss = 3.0271e-03, PNorm = 154.2364, GNorm = 0.1753, lr_0 = 1.7054e-04
Loss = 1.2423e-03, PNorm = 154.2417, GNorm = 0.0771, lr_0 = 1.7042e-04
Loss = 1.7168e-03, PNorm = 154.2445, GNorm = 0.2646, lr_0 = 1.7030e-04
Loss = 1.7323e-03, PNorm = 154.2463, GNorm = 0.1521, lr_0 = 1.7019e-04
Loss = 1.4848e-03, PNorm = 154.2489, GNorm = 0.0580, lr_0 = 1.7007e-04
Loss = 2.4890e-03, PNorm = 154.2505, GNorm = 0.2101, lr_0 = 1.6995e-04
Loss = 1.3475e-03, PNorm = 154.2520, GNorm = 0.0326, lr_0 = 1.6984e-04
Loss = 2.0822e-03, PNorm = 154.2551, GNorm = 0.1460, lr_0 = 1.6972e-04
Loss = 1.6314e-03, PNorm = 154.2578, GNorm = 0.0656, lr_0 = 1.6960e-04
Loss = 2.9448e-03, PNorm = 154.2579, GNorm = 0.1669, lr_0 = 1.6949e-04
Loss = 1.9205e-03, PNorm = 154.2594, GNorm = 0.1960, lr_0 = 1.6937e-04
Loss = 1.0702e-03, PNorm = 154.2628, GNorm = 0.0630, lr_0 = 1.6926e-04
Loss = 1.4056e-03, PNorm = 154.2665, GNorm = 0.1685, lr_0 = 1.6914e-04
Loss = 1.6185e-03, PNorm = 154.2695, GNorm = 0.2994, lr_0 = 1.6902e-04
Loss = 1.2420e-03, PNorm = 154.2722, GNorm = 0.2652, lr_0 = 1.6891e-04
Loss = 2.8474e-03, PNorm = 154.2766, GNorm = 0.0438, lr_0 = 1.6879e-04
Loss = 1.8507e-03, PNorm = 154.2811, GNorm = 0.2010, lr_0 = 1.6868e-04
Loss = 1.6691e-03, PNorm = 154.2832, GNorm = 0.2204, lr_0 = 1.6856e-04
Loss = 2.2749e-03, PNorm = 154.2864, GNorm = 0.3470, lr_0 = 1.6845e-04
Loss = 4.0403e-03, PNorm = 154.2893, GNorm = 0.1593, lr_0 = 1.6833e-04
Loss = 4.1445e-03, PNorm = 154.2899, GNorm = 0.1262, lr_0 = 1.6821e-04
Loss = 1.5607e-03, PNorm = 154.2937, GNorm = 0.1403, lr_0 = 1.6810e-04
Loss = 2.0856e-03, PNorm = 154.2970, GNorm = 0.1869, lr_0 = 1.6798e-04
Loss = 2.3277e-03, PNorm = 154.2973, GNorm = 0.3558, lr_0 = 1.6787e-04
Loss = 1.9729e-03, PNorm = 154.3004, GNorm = 0.1118, lr_0 = 1.6775e-04
Loss = 1.3901e-03, PNorm = 154.3021, GNorm = 0.1079, lr_0 = 1.6764e-04
Loss = 1.0820e-03, PNorm = 154.3059, GNorm = 0.0449, lr_0 = 1.6752e-04
Loss = 2.0496e-03, PNorm = 154.3083, GNorm = 0.0602, lr_0 = 1.6741e-04
Loss = 1.2483e-03, PNorm = 154.3123, GNorm = 0.1213, lr_0 = 1.6729e-04
Loss = 1.7247e-03, PNorm = 154.3164, GNorm = 0.2564, lr_0 = 1.6718e-04
Loss = 1.3577e-03, PNorm = 154.3197, GNorm = 0.1637, lr_0 = 1.6707e-04
Loss = 1.3139e-03, PNorm = 154.3217, GNorm = 0.0517, lr_0 = 1.6695e-04
Loss = 2.0650e-03, PNorm = 154.3256, GNorm = 0.2897, lr_0 = 1.6684e-04
Loss = 1.3483e-03, PNorm = 154.3300, GNorm = 0.1445, lr_0 = 1.6672e-04
Loss = 1.5879e-03, PNorm = 154.3316, GNorm = 0.1671, lr_0 = 1.6661e-04
Loss = 3.3633e-03, PNorm = 154.3358, GNorm = 0.0612, lr_0 = 1.6649e-04
Loss = 1.4049e-03, PNorm = 154.3393, GNorm = 0.1417, lr_0 = 1.6638e-04
Loss = 1.9932e-03, PNorm = 154.3423, GNorm = 0.1329, lr_0 = 1.6627e-04
Loss = 1.6529e-03, PNorm = 154.3447, GNorm = 0.1383, lr_0 = 1.6615e-04
Loss = 3.4768e-03, PNorm = 154.3464, GNorm = 0.0984, lr_0 = 1.6604e-04
Loss = 1.0831e-03, PNorm = 154.3486, GNorm = 0.1008, lr_0 = 1.6592e-04
Loss = 1.7698e-03, PNorm = 154.3522, GNorm = 0.1650, lr_0 = 1.6581e-04
Loss = 1.5154e-03, PNorm = 154.3566, GNorm = 0.1269, lr_0 = 1.6570e-04
Loss = 2.5282e-03, PNorm = 154.3629, GNorm = 0.1045, lr_0 = 1.6558e-04
Loss = 2.1765e-03, PNorm = 154.3699, GNorm = 0.0710, lr_0 = 1.6547e-04
Loss = 1.8788e-03, PNorm = 154.3755, GNorm = 0.1323, lr_0 = 1.6536e-04
Loss = 1.2134e-03, PNorm = 154.3773, GNorm = 0.0917, lr_0 = 1.6524e-04
Loss = 2.9677e-03, PNorm = 154.3789, GNorm = 0.0539, lr_0 = 1.6513e-04
Loss = 1.1752e-03, PNorm = 154.3809, GNorm = 0.1595, lr_0 = 1.6502e-04
Loss = 1.4222e-03, PNorm = 154.3861, GNorm = 0.0778, lr_0 = 1.6490e-04
Loss = 1.6599e-03, PNorm = 154.3895, GNorm = 0.0933, lr_0 = 1.6479e-04
Loss = 1.2853e-03, PNorm = 154.3916, GNorm = 0.1261, lr_0 = 1.6468e-04
Loss = 1.5139e-03, PNorm = 154.3950, GNorm = 0.0824, lr_0 = 1.6457e-04
Loss = 9.7301e-04, PNorm = 154.3984, GNorm = 0.1179, lr_0 = 1.6445e-04
Loss = 1.5635e-03, PNorm = 154.4025, GNorm = 0.1118, lr_0 = 1.6434e-04
Loss = 1.2193e-03, PNorm = 154.4055, GNorm = 0.0491, lr_0 = 1.6423e-04
Loss = 1.3486e-03, PNorm = 154.4071, GNorm = 0.1971, lr_0 = 1.6412e-04
Loss = 1.3652e-03, PNorm = 154.4103, GNorm = 0.0602, lr_0 = 1.6400e-04
Loss = 1.6550e-03, PNorm = 154.4112, GNorm = 0.0890, lr_0 = 1.6389e-04
Loss = 1.5562e-03, PNorm = 154.4133, GNorm = 0.0956, lr_0 = 1.6378e-04
Validation mae = 0.475146
Epoch 24
Loss = 2.0489e-03, PNorm = 154.4144, GNorm = 0.0917, lr_0 = 1.6367e-04
Loss = 9.2868e-04, PNorm = 154.4148, GNorm = 0.0311, lr_0 = 1.6355e-04
Loss = 1.6827e-03, PNorm = 154.4156, GNorm = 0.1513, lr_0 = 1.6344e-04
Loss = 1.2249e-03, PNorm = 154.4193, GNorm = 0.1100, lr_0 = 1.6333e-04
Loss = 1.0928e-03, PNorm = 154.4217, GNorm = 0.0724, lr_0 = 1.6322e-04
Loss = 1.8483e-03, PNorm = 154.4222, GNorm = 0.2645, lr_0 = 1.6311e-04
Loss = 1.3294e-03, PNorm = 154.4256, GNorm = 0.1413, lr_0 = 1.6299e-04
Loss = 1.6066e-03, PNorm = 154.4283, GNorm = 0.1348, lr_0 = 1.6288e-04
Loss = 9.9325e-04, PNorm = 154.4308, GNorm = 0.0335, lr_0 = 1.6277e-04
Loss = 1.0208e-03, PNorm = 154.4310, GNorm = 0.0920, lr_0 = 1.6266e-04
Loss = 1.3978e-03, PNorm = 154.4307, GNorm = 0.1034, lr_0 = 1.6255e-04
Loss = 1.6221e-03, PNorm = 154.4326, GNorm = 0.1895, lr_0 = 1.6244e-04
Loss = 1.6289e-03, PNorm = 154.4327, GNorm = 0.0981, lr_0 = 1.6233e-04
Loss = 1.0439e-03, PNorm = 154.4336, GNorm = 0.1001, lr_0 = 1.6221e-04
Loss = 1.1777e-03, PNorm = 154.4344, GNorm = 0.1443, lr_0 = 1.6210e-04
Loss = 9.7568e-04, PNorm = 154.4368, GNorm = 0.3029, lr_0 = 1.6199e-04
Loss = 1.0182e-03, PNorm = 154.4395, GNorm = 0.2959, lr_0 = 1.6188e-04
Loss = 1.6061e-03, PNorm = 154.4423, GNorm = 0.1885, lr_0 = 1.6177e-04
Loss = 1.4000e-03, PNorm = 154.4449, GNorm = 0.0749, lr_0 = 1.6166e-04
Loss = 1.2917e-03, PNorm = 154.4477, GNorm = 0.2007, lr_0 = 1.6155e-04
Loss = 1.2357e-03, PNorm = 154.4513, GNorm = 0.1858, lr_0 = 1.6144e-04
Loss = 1.7419e-03, PNorm = 154.4539, GNorm = 0.1042, lr_0 = 1.6133e-04
Loss = 1.5998e-03, PNorm = 154.4572, GNorm = 0.0735, lr_0 = 1.6122e-04
Loss = 1.0185e-03, PNorm = 154.4583, GNorm = 0.1497, lr_0 = 1.6111e-04
Loss = 1.1010e-03, PNorm = 154.4595, GNorm = 0.1364, lr_0 = 1.6100e-04
Loss = 9.8441e-04, PNorm = 154.4601, GNorm = 0.1025, lr_0 = 1.6089e-04
Loss = 2.1287e-03, PNorm = 154.4628, GNorm = 0.1289, lr_0 = 1.6078e-04
Loss = 9.0809e-04, PNorm = 154.4655, GNorm = 0.0463, lr_0 = 1.6067e-04
Loss = 1.0605e-03, PNorm = 154.4677, GNorm = 0.0916, lr_0 = 1.6056e-04
Loss = 1.1333e-03, PNorm = 154.4697, GNorm = 0.2746, lr_0 = 1.6045e-04
Loss = 1.5465e-03, PNorm = 154.4725, GNorm = 0.1516, lr_0 = 1.6034e-04
Loss = 1.3555e-03, PNorm = 154.4742, GNorm = 0.0965, lr_0 = 1.6023e-04
Loss = 1.3452e-03, PNorm = 154.4762, GNorm = 0.0221, lr_0 = 1.6012e-04
Loss = 1.3666e-03, PNorm = 154.4791, GNorm = 0.0791, lr_0 = 1.6001e-04
Loss = 2.0096e-03, PNorm = 154.4796, GNorm = 0.0865, lr_0 = 1.5990e-04
Loss = 1.2289e-03, PNorm = 154.4827, GNorm = 0.0300, lr_0 = 1.5979e-04
Loss = 1.1587e-03, PNorm = 154.4875, GNorm = 0.1025, lr_0 = 1.5968e-04
Loss = 1.0455e-03, PNorm = 154.4906, GNorm = 0.0568, lr_0 = 1.5957e-04
Loss = 9.2217e-04, PNorm = 154.4932, GNorm = 0.2326, lr_0 = 1.5946e-04
Loss = 1.1928e-03, PNorm = 154.4942, GNorm = 0.1885, lr_0 = 1.5935e-04
Loss = 1.1035e-03, PNorm = 154.4945, GNorm = 0.1483, lr_0 = 1.5924e-04
Loss = 9.4181e-04, PNorm = 154.4958, GNorm = 0.1190, lr_0 = 1.5913e-04
Loss = 1.0625e-03, PNorm = 154.4995, GNorm = 0.0371, lr_0 = 1.5902e-04
Loss = 1.1564e-03, PNorm = 154.5031, GNorm = 0.0889, lr_0 = 1.5891e-04
Loss = 3.0443e-03, PNorm = 154.5063, GNorm = 0.0690, lr_0 = 1.5880e-04
Loss = 2.2932e-03, PNorm = 154.5087, GNorm = 0.1184, lr_0 = 1.5870e-04
Loss = 1.0260e-03, PNorm = 154.5120, GNorm = 0.0742, lr_0 = 1.5859e-04
Loss = 1.4214e-03, PNorm = 154.5152, GNorm = 0.0592, lr_0 = 1.5848e-04
Loss = 3.1841e-03, PNorm = 154.5188, GNorm = 0.1364, lr_0 = 1.5837e-04
Loss = 1.4552e-03, PNorm = 154.5203, GNorm = 0.0964, lr_0 = 1.5826e-04
Loss = 1.8769e-03, PNorm = 154.5217, GNorm = 0.2040, lr_0 = 1.5815e-04
Loss = 1.0365e-03, PNorm = 154.5229, GNorm = 0.1295, lr_0 = 1.5804e-04
Loss = 1.6209e-03, PNorm = 154.5249, GNorm = 0.1081, lr_0 = 1.5794e-04
Loss = 1.3511e-03, PNorm = 154.5263, GNorm = 0.0326, lr_0 = 1.5783e-04
Loss = 1.3245e-03, PNorm = 154.5280, GNorm = 0.0847, lr_0 = 1.5772e-04
Loss = 1.8625e-03, PNorm = 154.5301, GNorm = 0.0834, lr_0 = 1.5761e-04
Loss = 1.6450e-03, PNorm = 154.5322, GNorm = 0.1173, lr_0 = 1.5750e-04
Loss = 1.1298e-03, PNorm = 154.5347, GNorm = 0.0359, lr_0 = 1.5740e-04
Loss = 1.0278e-03, PNorm = 154.5376, GNorm = 0.1628, lr_0 = 1.5729e-04
Loss = 1.2853e-03, PNorm = 154.5386, GNorm = 0.0915, lr_0 = 1.5718e-04
Loss = 1.4932e-03, PNorm = 154.5397, GNorm = 0.1098, lr_0 = 1.5707e-04
Loss = 2.2034e-03, PNorm = 154.5417, GNorm = 0.0919, lr_0 = 1.5697e-04
Loss = 2.5561e-03, PNorm = 154.5439, GNorm = 0.1647, lr_0 = 1.5686e-04
Loss = 2.1982e-03, PNorm = 154.5461, GNorm = 0.0548, lr_0 = 1.5675e-04
Loss = 1.1529e-03, PNorm = 154.5490, GNorm = 0.0486, lr_0 = 1.5664e-04
Loss = 8.4513e-04, PNorm = 154.5506, GNorm = 0.0443, lr_0 = 1.5654e-04
Loss = 1.1772e-03, PNorm = 154.5539, GNorm = 0.0401, lr_0 = 1.5643e-04
Loss = 1.2030e-03, PNorm = 154.5563, GNorm = 0.1408, lr_0 = 1.5632e-04
Loss = 8.9830e-04, PNorm = 154.5579, GNorm = 0.1576, lr_0 = 1.5621e-04
Loss = 1.5550e-03, PNorm = 154.5605, GNorm = 0.0967, lr_0 = 1.5611e-04
Loss = 3.0802e-03, PNorm = 154.5616, GNorm = 0.1107, lr_0 = 1.5600e-04
Loss = 9.1120e-04, PNorm = 154.5606, GNorm = 0.0513, lr_0 = 1.5589e-04
Loss = 2.5503e-03, PNorm = 154.5620, GNorm = 0.0956, lr_0 = 1.5579e-04
Loss = 1.7565e-03, PNorm = 154.5663, GNorm = 0.0999, lr_0 = 1.5568e-04
Loss = 1.6335e-03, PNorm = 154.5707, GNorm = 0.0671, lr_0 = 1.5557e-04
Loss = 8.0342e-04, PNorm = 154.5731, GNorm = 0.1003, lr_0 = 1.5547e-04
Loss = 8.9981e-04, PNorm = 154.5753, GNorm = 0.0328, lr_0 = 1.5536e-04
Loss = 2.7707e-03, PNorm = 154.5779, GNorm = 0.1538, lr_0 = 1.5525e-04
Loss = 1.4793e-03, PNorm = 154.5816, GNorm = 0.1451, lr_0 = 1.5515e-04
Loss = 1.3122e-03, PNorm = 154.5875, GNorm = 0.0606, lr_0 = 1.5504e-04
Loss = 1.0831e-03, PNorm = 154.5899, GNorm = 0.0552, lr_0 = 1.5493e-04
Loss = 2.8809e-03, PNorm = 154.5926, GNorm = 0.1180, lr_0 = 1.5483e-04
Loss = 1.4241e-03, PNorm = 154.5935, GNorm = 0.1085, lr_0 = 1.5472e-04
Loss = 1.2119e-03, PNorm = 154.5945, GNorm = 0.0888, lr_0 = 1.5462e-04
Loss = 2.2311e-03, PNorm = 154.5965, GNorm = 0.1218, lr_0 = 1.5451e-04
Loss = 2.5076e-03, PNorm = 154.6000, GNorm = 0.1951, lr_0 = 1.5440e-04
Loss = 3.2547e-03, PNorm = 154.6020, GNorm = 0.2018, lr_0 = 1.5430e-04
Loss = 1.0935e-03, PNorm = 154.6028, GNorm = 0.0690, lr_0 = 1.5419e-04
Loss = 1.8986e-03, PNorm = 154.6056, GNorm = 0.2315, lr_0 = 1.5409e-04
Loss = 3.0345e-03, PNorm = 154.6111, GNorm = 0.0338, lr_0 = 1.5398e-04
Loss = 2.4422e-03, PNorm = 154.6149, GNorm = 0.1799, lr_0 = 1.5388e-04
Loss = 1.4782e-03, PNorm = 154.6155, GNorm = 0.1272, lr_0 = 1.5377e-04
Loss = 4.3872e-03, PNorm = 154.6174, GNorm = 0.2597, lr_0 = 1.5367e-04
Loss = 3.3995e-03, PNorm = 154.6216, GNorm = 0.1929, lr_0 = 1.5356e-04
Loss = 1.0984e-03, PNorm = 154.6266, GNorm = 0.1359, lr_0 = 1.5346e-04
Loss = 2.6464e-03, PNorm = 154.6293, GNorm = 0.1587, lr_0 = 1.5335e-04
Loss = 2.0938e-03, PNorm = 154.6311, GNorm = 0.0583, lr_0 = 1.5325e-04
Loss = 1.5135e-03, PNorm = 154.6368, GNorm = 0.1797, lr_0 = 1.5314e-04
Loss = 1.9784e-03, PNorm = 154.6392, GNorm = 0.0761, lr_0 = 1.5304e-04
Loss = 1.2971e-03, PNorm = 154.6425, GNorm = 0.1327, lr_0 = 1.5293e-04
Loss = 2.3027e-03, PNorm = 154.6467, GNorm = 0.0570, lr_0 = 1.5283e-04
Loss = 1.0407e-03, PNorm = 154.6498, GNorm = 0.0621, lr_0 = 1.5272e-04
Loss = 1.1815e-03, PNorm = 154.6512, GNorm = 0.0984, lr_0 = 1.5262e-04
Loss = 2.5205e-03, PNorm = 154.6516, GNorm = 0.3312, lr_0 = 1.5251e-04
Loss = 6.1528e-03, PNorm = 154.6532, GNorm = 0.1715, lr_0 = 1.5241e-04
Loss = 2.3006e-03, PNorm = 154.6542, GNorm = 0.1157, lr_0 = 1.5230e-04
Loss = 1.5931e-03, PNorm = 154.6558, GNorm = 0.1183, lr_0 = 1.5220e-04
Loss = 1.8282e-03, PNorm = 154.6568, GNorm = 0.1130, lr_0 = 1.5209e-04
Loss = 2.7890e-03, PNorm = 154.6572, GNorm = 0.1615, lr_0 = 1.5199e-04
Loss = 1.6798e-03, PNorm = 154.6582, GNorm = 0.2056, lr_0 = 1.5189e-04
Loss = 2.0285e-03, PNorm = 154.6578, GNorm = 0.0847, lr_0 = 1.5178e-04
Loss = 1.2459e-03, PNorm = 154.6602, GNorm = 0.1380, lr_0 = 1.5168e-04
Loss = 1.0113e-03, PNorm = 154.6633, GNorm = 0.0592, lr_0 = 1.5157e-04
Loss = 1.3958e-03, PNorm = 154.6669, GNorm = 0.1183, lr_0 = 1.5147e-04
Loss = 1.2236e-03, PNorm = 154.6685, GNorm = 0.0597, lr_0 = 1.5137e-04
Loss = 1.0390e-03, PNorm = 154.6709, GNorm = 0.0546, lr_0 = 1.5126e-04
Loss = 1.9288e-03, PNorm = 154.6734, GNorm = 0.0562, lr_0 = 1.5116e-04
Loss = 1.5589e-03, PNorm = 154.6761, GNorm = 0.2119, lr_0 = 1.5106e-04
Loss = 2.3007e-03, PNorm = 154.6781, GNorm = 0.1442, lr_0 = 1.5095e-04
Loss = 4.6694e-03, PNorm = 154.6785, GNorm = 0.2640, lr_0 = 1.5085e-04
Validation mae = 0.476899
Epoch 25
Loss = 1.3899e-03, PNorm = 154.6827, GNorm = 0.1442, lr_0 = 1.5075e-04
Loss = 1.0151e-03, PNorm = 154.6839, GNorm = 0.0595, lr_0 = 1.5064e-04
Loss = 8.6988e-04, PNorm = 154.6859, GNorm = 0.1400, lr_0 = 1.5054e-04
Loss = 2.2729e-03, PNorm = 154.6877, GNorm = 0.2017, lr_0 = 1.5044e-04
Loss = 1.4144e-03, PNorm = 154.6912, GNorm = 0.1724, lr_0 = 1.5033e-04
Loss = 9.7450e-04, PNorm = 154.6950, GNorm = 0.1048, lr_0 = 1.5023e-04
Loss = 1.3119e-03, PNorm = 154.6978, GNorm = 0.1048, lr_0 = 1.5013e-04
Loss = 1.3710e-03, PNorm = 154.7003, GNorm = 0.1343, lr_0 = 1.5002e-04
Loss = 9.2212e-04, PNorm = 154.7024, GNorm = 0.0559, lr_0 = 1.4992e-04
Loss = 8.3321e-04, PNorm = 154.7043, GNorm = 0.0910, lr_0 = 1.4982e-04
Loss = 1.2483e-03, PNorm = 154.7059, GNorm = 0.0414, lr_0 = 1.4972e-04
Loss = 1.2596e-03, PNorm = 154.7082, GNorm = 0.1158, lr_0 = 1.4961e-04
Loss = 2.1479e-03, PNorm = 154.7106, GNorm = 0.0211, lr_0 = 1.4951e-04
Loss = 8.6269e-04, PNorm = 154.7117, GNorm = 0.0448, lr_0 = 1.4941e-04
Loss = 1.9884e-03, PNorm = 154.7125, GNorm = 0.0823, lr_0 = 1.4931e-04
Loss = 9.2325e-04, PNorm = 154.7147, GNorm = 0.0593, lr_0 = 1.4920e-04
Loss = 1.3198e-03, PNorm = 154.7176, GNorm = 0.0661, lr_0 = 1.4910e-04
Loss = 2.1869e-03, PNorm = 154.7194, GNorm = 0.0615, lr_0 = 1.4900e-04
Loss = 1.5286e-03, PNorm = 154.7213, GNorm = 0.1026, lr_0 = 1.4890e-04
Loss = 1.2629e-03, PNorm = 154.7245, GNorm = 0.1141, lr_0 = 1.4880e-04
Loss = 8.4004e-04, PNorm = 154.7269, GNorm = 0.0922, lr_0 = 1.4869e-04
Loss = 1.0926e-03, PNorm = 154.7291, GNorm = 0.0359, lr_0 = 1.4859e-04
Loss = 2.7221e-03, PNorm = 154.7307, GNorm = 0.2127, lr_0 = 1.4849e-04
Loss = 2.3659e-03, PNorm = 154.7302, GNorm = 0.1750, lr_0 = 1.4839e-04
Loss = 1.8787e-03, PNorm = 154.7315, GNorm = 0.0311, lr_0 = 1.4829e-04
Loss = 1.1735e-03, PNorm = 154.7328, GNorm = 0.0557, lr_0 = 1.4818e-04
Loss = 1.2565e-03, PNorm = 154.7355, GNorm = 0.0745, lr_0 = 1.4808e-04
Loss = 1.9475e-03, PNorm = 154.7385, GNorm = 0.1548, lr_0 = 1.4798e-04
Loss = 8.6670e-04, PNorm = 154.7419, GNorm = 0.1059, lr_0 = 1.4788e-04
Loss = 3.9121e-03, PNorm = 154.7448, GNorm = 0.2298, lr_0 = 1.4778e-04
Loss = 1.0583e-03, PNorm = 154.7473, GNorm = 0.1479, lr_0 = 1.4768e-04
Loss = 1.1614e-03, PNorm = 154.7495, GNorm = 0.1275, lr_0 = 1.4758e-04
Loss = 1.0917e-03, PNorm = 154.7511, GNorm = 0.1681, lr_0 = 1.4748e-04
Loss = 1.2843e-03, PNorm = 154.7537, GNorm = 0.0335, lr_0 = 1.4737e-04
Loss = 9.3098e-04, PNorm = 154.7537, GNorm = 0.0830, lr_0 = 1.4727e-04
Loss = 1.0285e-03, PNorm = 154.7540, GNorm = 0.0446, lr_0 = 1.4717e-04
Loss = 7.8343e-04, PNorm = 154.7547, GNorm = 0.1372, lr_0 = 1.4707e-04
Loss = 2.0842e-03, PNorm = 154.7566, GNorm = 0.0861, lr_0 = 1.4697e-04
Loss = 8.7701e-04, PNorm = 154.7594, GNorm = 0.0654, lr_0 = 1.4687e-04
Loss = 1.2857e-03, PNorm = 154.7628, GNorm = 0.1237, lr_0 = 1.4677e-04
Loss = 1.1410e-03, PNorm = 154.7646, GNorm = 0.0391, lr_0 = 1.4667e-04
Loss = 3.4144e-03, PNorm = 154.7655, GNorm = 0.4746, lr_0 = 1.4657e-04
Loss = 9.4304e-04, PNorm = 154.7662, GNorm = 0.1883, lr_0 = 1.4647e-04
Loss = 2.2886e-03, PNorm = 154.7676, GNorm = 0.1287, lr_0 = 1.4637e-04
Loss = 1.6261e-03, PNorm = 154.7684, GNorm = 0.0721, lr_0 = 1.4627e-04
Loss = 1.2557e-03, PNorm = 154.7701, GNorm = 0.1359, lr_0 = 1.4617e-04
Loss = 1.2160e-03, PNorm = 154.7737, GNorm = 0.1381, lr_0 = 1.4607e-04
Loss = 1.0041e-03, PNorm = 154.7769, GNorm = 0.0670, lr_0 = 1.4597e-04
Loss = 1.3084e-03, PNorm = 154.7792, GNorm = 0.0738, lr_0 = 1.4587e-04
Loss = 2.7143e-03, PNorm = 154.7808, GNorm = 0.5799, lr_0 = 1.4577e-04
Loss = 8.6684e-04, PNorm = 154.7813, GNorm = 0.0441, lr_0 = 1.4567e-04
Loss = 9.9814e-04, PNorm = 154.7834, GNorm = 0.1436, lr_0 = 1.4557e-04
Loss = 1.4179e-03, PNorm = 154.7852, GNorm = 0.1065, lr_0 = 1.4547e-04
Loss = 9.6951e-04, PNorm = 154.7865, GNorm = 0.1259, lr_0 = 1.4537e-04
Loss = 1.2438e-03, PNorm = 154.7877, GNorm = 0.1012, lr_0 = 1.4527e-04
Loss = 1.9327e-03, PNorm = 154.7899, GNorm = 0.0592, lr_0 = 1.4517e-04
Loss = 9.7754e-04, PNorm = 154.7933, GNorm = 0.0543, lr_0 = 1.4507e-04
Loss = 8.4047e-04, PNorm = 154.7971, GNorm = 0.1316, lr_0 = 1.4497e-04
Loss = 1.8985e-03, PNorm = 154.7986, GNorm = 0.0386, lr_0 = 1.4487e-04
Loss = 1.0933e-03, PNorm = 154.8002, GNorm = 0.1249, lr_0 = 1.4477e-04
Loss = 1.2106e-03, PNorm = 154.8039, GNorm = 0.1124, lr_0 = 1.4467e-04
Loss = 8.7581e-04, PNorm = 154.8068, GNorm = 0.0897, lr_0 = 1.4457e-04
Loss = 1.5778e-03, PNorm = 154.8068, GNorm = 0.0295, lr_0 = 1.4447e-04
Loss = 1.7991e-03, PNorm = 154.8080, GNorm = 0.1341, lr_0 = 1.4438e-04
Loss = 8.9883e-04, PNorm = 154.8093, GNorm = 0.1041, lr_0 = 1.4428e-04
Loss = 1.5043e-03, PNorm = 154.8129, GNorm = 0.0346, lr_0 = 1.4418e-04
Loss = 9.4863e-04, PNorm = 154.8178, GNorm = 0.0228, lr_0 = 1.4408e-04
Loss = 1.3942e-03, PNorm = 154.8220, GNorm = 0.1258, lr_0 = 1.4398e-04
Loss = 1.1335e-03, PNorm = 154.8244, GNorm = 0.1465, lr_0 = 1.4388e-04
Loss = 1.3676e-03, PNorm = 154.8257, GNorm = 0.0668, lr_0 = 1.4378e-04
Loss = 3.4076e-03, PNorm = 154.8293, GNorm = 0.1649, lr_0 = 1.4368e-04
Loss = 1.1575e-03, PNorm = 154.8296, GNorm = 0.0327, lr_0 = 1.4359e-04
Loss = 2.9293e-03, PNorm = 154.8310, GNorm = 0.1469, lr_0 = 1.4349e-04
Loss = 1.2039e-03, PNorm = 154.8343, GNorm = 0.1224, lr_0 = 1.4339e-04
Loss = 1.3832e-03, PNorm = 154.8354, GNorm = 0.0371, lr_0 = 1.4329e-04
Loss = 9.9985e-04, PNorm = 154.8378, GNorm = 0.0429, lr_0 = 1.4319e-04
Loss = 2.0887e-03, PNorm = 154.8409, GNorm = 0.0658, lr_0 = 1.4310e-04
Loss = 9.9760e-04, PNorm = 154.8439, GNorm = 0.0532, lr_0 = 1.4300e-04
Loss = 8.5847e-04, PNorm = 154.8466, GNorm = 0.1896, lr_0 = 1.4290e-04
Loss = 1.5266e-03, PNorm = 154.8471, GNorm = 0.1156, lr_0 = 1.4280e-04
Loss = 2.0137e-03, PNorm = 154.8493, GNorm = 0.0395, lr_0 = 1.4270e-04
Loss = 4.1580e-03, PNorm = 154.8534, GNorm = 0.0595, lr_0 = 1.4261e-04
Loss = 1.1392e-03, PNorm = 154.8581, GNorm = 0.0631, lr_0 = 1.4251e-04
Loss = 8.5796e-04, PNorm = 154.8610, GNorm = 0.0646, lr_0 = 1.4241e-04
Loss = 4.5988e-03, PNorm = 154.8621, GNorm = 0.1879, lr_0 = 1.4231e-04
Loss = 1.0379e-03, PNorm = 154.8647, GNorm = 0.1583, lr_0 = 1.4222e-04
Loss = 1.4209e-03, PNorm = 154.8674, GNorm = 0.1944, lr_0 = 1.4212e-04
Loss = 3.1860e-03, PNorm = 154.8723, GNorm = 0.1050, lr_0 = 1.4202e-04
Loss = 1.1156e-03, PNorm = 154.8754, GNorm = 0.0506, lr_0 = 1.4192e-04
Loss = 1.3452e-03, PNorm = 154.8765, GNorm = 0.0375, lr_0 = 1.4183e-04
Loss = 1.4053e-03, PNorm = 154.8781, GNorm = 0.1289, lr_0 = 1.4173e-04
Loss = 9.5440e-04, PNorm = 154.8807, GNorm = 0.1350, lr_0 = 1.4163e-04
Loss = 2.7048e-03, PNorm = 154.8820, GNorm = 0.1463, lr_0 = 1.4153e-04
Loss = 1.9111e-03, PNorm = 154.8811, GNorm = 0.2282, lr_0 = 1.4144e-04
Loss = 2.2214e-03, PNorm = 154.8812, GNorm = 0.2447, lr_0 = 1.4134e-04
Loss = 2.0932e-03, PNorm = 154.8817, GNorm = 0.1138, lr_0 = 1.4124e-04
Loss = 1.2603e-03, PNorm = 154.8863, GNorm = 0.0623, lr_0 = 1.4115e-04
Loss = 9.7357e-04, PNorm = 154.8897, GNorm = 0.0973, lr_0 = 1.4105e-04
Loss = 8.9471e-04, PNorm = 154.8913, GNorm = 0.1295, lr_0 = 1.4095e-04
Loss = 1.4836e-03, PNorm = 154.8930, GNorm = 0.0720, lr_0 = 1.4086e-04
Loss = 1.0478e-03, PNorm = 154.8962, GNorm = 0.1045, lr_0 = 1.4076e-04
Loss = 9.4266e-04, PNorm = 154.8987, GNorm = 0.0716, lr_0 = 1.4066e-04
Loss = 1.7053e-03, PNorm = 154.9011, GNorm = 0.1483, lr_0 = 1.4057e-04
Loss = 2.9502e-03, PNorm = 154.9046, GNorm = 0.0691, lr_0 = 1.4047e-04
Loss = 7.6165e-04, PNorm = 154.9064, GNorm = 0.1558, lr_0 = 1.4038e-04
Loss = 1.0957e-03, PNorm = 154.9095, GNorm = 0.1064, lr_0 = 1.4028e-04
Loss = 1.7274e-03, PNorm = 154.9129, GNorm = 0.0413, lr_0 = 1.4018e-04
Loss = 1.2332e-03, PNorm = 154.9157, GNorm = 0.0865, lr_0 = 1.4009e-04
Loss = 2.2293e-03, PNorm = 154.9192, GNorm = 0.1000, lr_0 = 1.3999e-04
Loss = 1.5845e-03, PNorm = 154.9206, GNorm = 0.0331, lr_0 = 1.3990e-04
Loss = 1.3286e-03, PNorm = 154.9226, GNorm = 0.0877, lr_0 = 1.3980e-04
Loss = 1.8914e-03, PNorm = 154.9238, GNorm = 0.0974, lr_0 = 1.3970e-04
Loss = 1.0352e-03, PNorm = 154.9272, GNorm = 0.0771, lr_0 = 1.3961e-04
Loss = 1.4644e-03, PNorm = 154.9286, GNorm = 0.1784, lr_0 = 1.3951e-04
Loss = 1.3531e-03, PNorm = 154.9311, GNorm = 0.0478, lr_0 = 1.3942e-04
Loss = 2.6764e-03, PNorm = 154.9331, GNorm = 0.0590, lr_0 = 1.3932e-04
Loss = 1.2740e-03, PNorm = 154.9356, GNorm = 0.1342, lr_0 = 1.3923e-04
Loss = 9.1844e-04, PNorm = 154.9378, GNorm = 0.1120, lr_0 = 1.3913e-04
Loss = 2.1720e-03, PNorm = 154.9401, GNorm = 0.0716, lr_0 = 1.3904e-04
Loss = 2.6547e-03, PNorm = 154.9425, GNorm = 0.0516, lr_0 = 1.3894e-04
Validation mae = 0.475572
Epoch 26
Loss = 1.6075e-03, PNorm = 154.9439, GNorm = 0.0793, lr_0 = 1.3884e-04
Loss = 1.4219e-03, PNorm = 154.9449, GNorm = 0.1279, lr_0 = 1.3875e-04
Loss = 3.3661e-03, PNorm = 154.9439, GNorm = 0.0483, lr_0 = 1.3865e-04
Loss = 2.4633e-03, PNorm = 154.9466, GNorm = 0.0786, lr_0 = 1.3856e-04
Loss = 2.5013e-03, PNorm = 154.9485, GNorm = 0.1498, lr_0 = 1.3846e-04
Loss = 9.9393e-04, PNorm = 154.9493, GNorm = 0.0348, lr_0 = 1.3837e-04
Loss = 2.6617e-03, PNorm = 154.9494, GNorm = 0.1128, lr_0 = 1.3828e-04
Loss = 1.0935e-03, PNorm = 154.9507, GNorm = 0.0657, lr_0 = 1.3818e-04
Loss = 1.6096e-03, PNorm = 154.9505, GNorm = 0.0570, lr_0 = 1.3809e-04
Loss = 8.4305e-04, PNorm = 154.9512, GNorm = 0.0635, lr_0 = 1.3799e-04
Loss = 2.1592e-03, PNorm = 154.9520, GNorm = 0.0616, lr_0 = 1.3790e-04
Loss = 2.4243e-03, PNorm = 154.9533, GNorm = 0.0747, lr_0 = 1.3780e-04
Loss = 1.1104e-03, PNorm = 154.9543, GNorm = 0.0427, lr_0 = 1.3771e-04
Loss = 7.7263e-04, PNorm = 154.9568, GNorm = 0.0724, lr_0 = 1.3761e-04
Loss = 9.6852e-04, PNorm = 154.9588, GNorm = 0.0607, lr_0 = 1.3752e-04
Loss = 1.1839e-03, PNorm = 154.9604, GNorm = 0.1059, lr_0 = 1.3742e-04
Loss = 8.8984e-04, PNorm = 154.9623, GNorm = 0.0509, lr_0 = 1.3733e-04
Loss = 1.0973e-03, PNorm = 154.9653, GNorm = 0.0680, lr_0 = 1.3724e-04
Loss = 1.7605e-03, PNorm = 154.9671, GNorm = 0.0696, lr_0 = 1.3714e-04
Loss = 8.1552e-04, PNorm = 154.9680, GNorm = 0.0728, lr_0 = 1.3705e-04
Loss = 8.5426e-04, PNorm = 154.9696, GNorm = 0.0651, lr_0 = 1.3695e-04
Loss = 1.1172e-03, PNorm = 154.9724, GNorm = 0.0998, lr_0 = 1.3686e-04
Loss = 6.3902e-04, PNorm = 154.9750, GNorm = 0.0819, lr_0 = 1.3677e-04
Loss = 7.3857e-04, PNorm = 154.9767, GNorm = 0.1118, lr_0 = 1.3667e-04
Loss = 1.1060e-03, PNorm = 154.9779, GNorm = 0.1021, lr_0 = 1.3658e-04
Loss = 1.3301e-03, PNorm = 154.9784, GNorm = 0.0236, lr_0 = 1.3649e-04
Loss = 9.3803e-04, PNorm = 154.9787, GNorm = 0.0759, lr_0 = 1.3639e-04
Loss = 2.2081e-03, PNorm = 154.9807, GNorm = 0.0657, lr_0 = 1.3630e-04
Loss = 8.9115e-04, PNorm = 154.9834, GNorm = 0.1022, lr_0 = 1.3621e-04
Loss = 1.1473e-03, PNorm = 154.9844, GNorm = 0.0462, lr_0 = 1.3611e-04
Loss = 8.0098e-04, PNorm = 154.9864, GNorm = 0.0661, lr_0 = 1.3602e-04
Loss = 7.6844e-04, PNorm = 154.9879, GNorm = 0.0626, lr_0 = 1.3593e-04
Loss = 9.8400e-04, PNorm = 154.9904, GNorm = 0.2025, lr_0 = 1.3583e-04
Loss = 6.6652e-04, PNorm = 154.9929, GNorm = 0.1430, lr_0 = 1.3574e-04
Loss = 8.9820e-04, PNorm = 154.9952, GNorm = 0.1092, lr_0 = 1.3565e-04
Loss = 8.0580e-04, PNorm = 154.9969, GNorm = 0.0415, lr_0 = 1.3555e-04
Loss = 1.4350e-03, PNorm = 154.9978, GNorm = 0.0641, lr_0 = 1.3546e-04
Loss = 1.5710e-03, PNorm = 154.9988, GNorm = 0.1061, lr_0 = 1.3537e-04
Loss = 9.8437e-04, PNorm = 155.0010, GNorm = 0.0876, lr_0 = 1.3528e-04
Loss = 6.8851e-04, PNorm = 155.0028, GNorm = 0.0746, lr_0 = 1.3518e-04
Loss = 8.8611e-04, PNorm = 155.0046, GNorm = 0.1703, lr_0 = 1.3509e-04
Loss = 2.8697e-03, PNorm = 155.0062, GNorm = 0.2591, lr_0 = 1.3500e-04
Loss = 1.1384e-03, PNorm = 155.0065, GNorm = 0.0671, lr_0 = 1.3491e-04
Loss = 6.7761e-04, PNorm = 155.0066, GNorm = 0.1317, lr_0 = 1.3481e-04
Loss = 6.3938e-04, PNorm = 155.0074, GNorm = 0.0435, lr_0 = 1.3472e-04
Loss = 1.2912e-03, PNorm = 155.0090, GNorm = 0.2523, lr_0 = 1.3463e-04
Loss = 1.7227e-03, PNorm = 155.0108, GNorm = 0.0379, lr_0 = 1.3454e-04
Loss = 2.6756e-03, PNorm = 155.0120, GNorm = 0.1246, lr_0 = 1.3444e-04
Loss = 2.8203e-03, PNorm = 155.0141, GNorm = 0.1004, lr_0 = 1.3435e-04
Loss = 8.8272e-04, PNorm = 155.0148, GNorm = 0.1140, lr_0 = 1.3426e-04
Loss = 9.1568e-04, PNorm = 155.0160, GNorm = 0.0806, lr_0 = 1.3417e-04
Loss = 6.4877e-04, PNorm = 155.0178, GNorm = 0.0878, lr_0 = 1.3408e-04
Loss = 1.0604e-03, PNorm = 155.0195, GNorm = 0.1226, lr_0 = 1.3398e-04
Loss = 6.6485e-04, PNorm = 155.0212, GNorm = 0.0995, lr_0 = 1.3389e-04
Loss = 1.6117e-03, PNorm = 155.0241, GNorm = 0.1547, lr_0 = 1.3380e-04
Loss = 1.2023e-03, PNorm = 155.0246, GNorm = 0.0631, lr_0 = 1.3371e-04
Loss = 7.1561e-04, PNorm = 155.0281, GNorm = 0.0282, lr_0 = 1.3362e-04
Loss = 2.2230e-03, PNorm = 155.0305, GNorm = 0.1566, lr_0 = 1.3353e-04
Loss = 1.1841e-03, PNorm = 155.0328, GNorm = 0.2595, lr_0 = 1.3343e-04
Loss = 8.9181e-04, PNorm = 155.0337, GNorm = 0.0596, lr_0 = 1.3334e-04
Loss = 1.3307e-03, PNorm = 155.0366, GNorm = 0.0922, lr_0 = 1.3325e-04
Loss = 1.1160e-03, PNorm = 155.0395, GNorm = 0.1089, lr_0 = 1.3316e-04
Loss = 2.0519e-03, PNorm = 155.0415, GNorm = 0.1521, lr_0 = 1.3307e-04
Loss = 1.7619e-03, PNorm = 155.0412, GNorm = 0.1485, lr_0 = 1.3298e-04
Loss = 1.1808e-03, PNorm = 155.0436, GNorm = 0.0693, lr_0 = 1.3289e-04
Loss = 1.0008e-03, PNorm = 155.0458, GNorm = 0.0792, lr_0 = 1.3280e-04
Loss = 6.8679e-04, PNorm = 155.0485, GNorm = 0.0328, lr_0 = 1.3270e-04
Loss = 6.9564e-04, PNorm = 155.0509, GNorm = 0.0425, lr_0 = 1.3261e-04
Loss = 1.8004e-03, PNorm = 155.0530, GNorm = 0.0595, lr_0 = 1.3252e-04
Loss = 4.4157e-03, PNorm = 155.0545, GNorm = 0.0466, lr_0 = 1.3243e-04
Loss = 1.9863e-03, PNorm = 155.0562, GNorm = 0.1171, lr_0 = 1.3234e-04
Loss = 1.9593e-03, PNorm = 155.0568, GNorm = 0.1085, lr_0 = 1.3225e-04
Loss = 7.5810e-04, PNorm = 155.0593, GNorm = 0.1383, lr_0 = 1.3216e-04
Loss = 1.0897e-03, PNorm = 155.0599, GNorm = 0.1096, lr_0 = 1.3207e-04
Loss = 2.2176e-03, PNorm = 155.0609, GNorm = 0.1853, lr_0 = 1.3198e-04
Loss = 1.5722e-03, PNorm = 155.0634, GNorm = 0.0456, lr_0 = 1.3189e-04
Loss = 8.8115e-04, PNorm = 155.0663, GNorm = 0.0348, lr_0 = 1.3180e-04
Loss = 7.6622e-04, PNorm = 155.0679, GNorm = 0.0962, lr_0 = 1.3171e-04
Loss = 7.2474e-04, PNorm = 155.0700, GNorm = 0.1216, lr_0 = 1.3162e-04
Loss = 6.8571e-04, PNorm = 155.0707, GNorm = 0.0314, lr_0 = 1.3153e-04
Loss = 1.6043e-03, PNorm = 155.0711, GNorm = 0.1677, lr_0 = 1.3144e-04
Loss = 1.4801e-03, PNorm = 155.0711, GNorm = 0.1640, lr_0 = 1.3135e-04
Loss = 1.3650e-03, PNorm = 155.0730, GNorm = 0.0250, lr_0 = 1.3126e-04
Loss = 1.8941e-03, PNorm = 155.0750, GNorm = 0.1021, lr_0 = 1.3117e-04
Loss = 1.2887e-03, PNorm = 155.0786, GNorm = 0.1148, lr_0 = 1.3108e-04
Loss = 8.2641e-04, PNorm = 155.0798, GNorm = 0.1346, lr_0 = 1.3099e-04
Loss = 1.0704e-03, PNorm = 155.0809, GNorm = 0.1189, lr_0 = 1.3090e-04
Loss = 6.4690e-04, PNorm = 155.0811, GNorm = 0.0625, lr_0 = 1.3081e-04
Loss = 3.9737e-03, PNorm = 155.0829, GNorm = 0.0946, lr_0 = 1.3072e-04
Loss = 8.2974e-04, PNorm = 155.0844, GNorm = 0.0408, lr_0 = 1.3063e-04
Loss = 2.3329e-03, PNorm = 155.0857, GNorm = 0.0793, lr_0 = 1.3054e-04
Loss = 1.9442e-03, PNorm = 155.0883, GNorm = 0.3984, lr_0 = 1.3045e-04
Loss = 6.2111e-04, PNorm = 155.0903, GNorm = 0.0636, lr_0 = 1.3036e-04
Loss = 9.5799e-04, PNorm = 155.0924, GNorm = 0.0971, lr_0 = 1.3027e-04
Loss = 1.8522e-03, PNorm = 155.0935, GNorm = 0.2871, lr_0 = 1.3018e-04
Loss = 7.0857e-04, PNorm = 155.0948, GNorm = 0.0694, lr_0 = 1.3009e-04
Loss = 1.6605e-03, PNorm = 155.0974, GNorm = 0.1199, lr_0 = 1.3000e-04
Loss = 1.7282e-03, PNorm = 155.1004, GNorm = 0.1408, lr_0 = 1.2992e-04
Loss = 3.8294e-03, PNorm = 155.1036, GNorm = 0.1613, lr_0 = 1.2983e-04
Loss = 7.7256e-04, PNorm = 155.1065, GNorm = 0.2023, lr_0 = 1.2974e-04
Loss = 1.5322e-03, PNorm = 155.1087, GNorm = 0.0288, lr_0 = 1.2965e-04
Loss = 9.6909e-04, PNorm = 155.1123, GNorm = 0.0523, lr_0 = 1.2956e-04
Loss = 1.2374e-03, PNorm = 155.1142, GNorm = 0.1798, lr_0 = 1.2947e-04
Loss = 1.4432e-03, PNorm = 155.1181, GNorm = 0.1273, lr_0 = 1.2938e-04
Loss = 2.7275e-03, PNorm = 155.1201, GNorm = 0.0783, lr_0 = 1.2929e-04
Loss = 7.2919e-04, PNorm = 155.1216, GNorm = 0.1015, lr_0 = 1.2921e-04
Loss = 2.2932e-03, PNorm = 155.1217, GNorm = 0.0570, lr_0 = 1.2912e-04
Loss = 1.9305e-03, PNorm = 155.1222, GNorm = 0.1356, lr_0 = 1.2903e-04
Loss = 8.3265e-04, PNorm = 155.1267, GNorm = 0.1021, lr_0 = 1.2894e-04
Loss = 1.5284e-03, PNorm = 155.1296, GNorm = 0.1080, lr_0 = 1.2885e-04
Loss = 7.0329e-04, PNorm = 155.1323, GNorm = 0.0308, lr_0 = 1.2876e-04
Loss = 1.3837e-03, PNorm = 155.1330, GNorm = 0.0556, lr_0 = 1.2867e-04
Loss = 1.8250e-03, PNorm = 155.1346, GNorm = 0.1577, lr_0 = 1.2859e-04
Loss = 9.5952e-04, PNorm = 155.1358, GNorm = 0.0753, lr_0 = 1.2850e-04
Loss = 8.1912e-04, PNorm = 155.1356, GNorm = 0.0383, lr_0 = 1.2841e-04
Loss = 9.5170e-04, PNorm = 155.1366, GNorm = 0.0391, lr_0 = 1.2832e-04
Loss = 6.7645e-04, PNorm = 155.1383, GNorm = 0.0836, lr_0 = 1.2823e-04
Loss = 2.9653e-03, PNorm = 155.1395, GNorm = 0.0457, lr_0 = 1.2815e-04
Loss = 1.3454e-03, PNorm = 155.1402, GNorm = 0.1090, lr_0 = 1.2806e-04
Loss = 1.0830e-03, PNorm = 155.1432, GNorm = 0.1099, lr_0 = 1.2797e-04
Validation mae = 0.475458
Epoch 27
Loss = 1.8971e-03, PNorm = 155.1445, GNorm = 0.1330, lr_0 = 1.2788e-04
Loss = 8.6781e-04, PNorm = 155.1460, GNorm = 0.0496, lr_0 = 1.2780e-04
Loss = 1.5560e-03, PNorm = 155.1476, GNorm = 0.0514, lr_0 = 1.2771e-04
Loss = 9.2358e-04, PNorm = 155.1485, GNorm = 0.0484, lr_0 = 1.2762e-04
Loss = 4.9313e-04, PNorm = 155.1491, GNorm = 0.0454, lr_0 = 1.2753e-04
Loss = 2.1203e-03, PNorm = 155.1496, GNorm = 0.0298, lr_0 = 1.2745e-04
Loss = 1.4669e-03, PNorm = 155.1509, GNorm = 0.0673, lr_0 = 1.2736e-04
Loss = 1.6201e-03, PNorm = 155.1524, GNorm = 0.0984, lr_0 = 1.2727e-04
Loss = 9.0880e-04, PNorm = 155.1537, GNorm = 0.0680, lr_0 = 1.2718e-04
Loss = 1.1962e-03, PNorm = 155.1544, GNorm = 0.0474, lr_0 = 1.2710e-04
Loss = 7.8106e-04, PNorm = 155.1560, GNorm = 0.0794, lr_0 = 1.2701e-04
Loss = 1.0401e-03, PNorm = 155.1563, GNorm = 0.3057, lr_0 = 1.2692e-04
Loss = 7.2930e-04, PNorm = 155.1574, GNorm = 0.0645, lr_0 = 1.2684e-04
Loss = 1.1899e-03, PNorm = 155.1597, GNorm = 0.1346, lr_0 = 1.2675e-04
Loss = 2.1553e-03, PNorm = 155.1621, GNorm = 0.0706, lr_0 = 1.2666e-04
Loss = 1.6614e-03, PNorm = 155.1646, GNorm = 0.0558, lr_0 = 1.2658e-04
Loss = 1.4050e-03, PNorm = 155.1665, GNorm = 0.1621, lr_0 = 1.2649e-04
Loss = 8.4351e-04, PNorm = 155.1686, GNorm = 0.1147, lr_0 = 1.2640e-04
Loss = 9.6987e-04, PNorm = 155.1698, GNorm = 0.0369, lr_0 = 1.2632e-04
Loss = 1.3610e-03, PNorm = 155.1718, GNorm = 0.1264, lr_0 = 1.2623e-04
Loss = 8.8973e-04, PNorm = 155.1715, GNorm = 0.1089, lr_0 = 1.2614e-04
Loss = 1.3859e-03, PNorm = 155.1722, GNorm = 0.0248, lr_0 = 1.2606e-04
Loss = 7.8810e-04, PNorm = 155.1742, GNorm = 0.0607, lr_0 = 1.2597e-04
Loss = 1.0944e-03, PNorm = 155.1745, GNorm = 0.0504, lr_0 = 1.2588e-04
Loss = 1.2836e-03, PNorm = 155.1756, GNorm = 0.0229, lr_0 = 1.2580e-04
Loss = 1.2061e-03, PNorm = 155.1769, GNorm = 0.1911, lr_0 = 1.2571e-04
Loss = 1.4873e-03, PNorm = 155.1783, GNorm = 0.1198, lr_0 = 1.2563e-04
Loss = 5.8850e-04, PNorm = 155.1786, GNorm = 0.1417, lr_0 = 1.2554e-04
Loss = 7.1598e-04, PNorm = 155.1816, GNorm = 0.0295, lr_0 = 1.2545e-04
Loss = 1.3282e-03, PNorm = 155.1839, GNorm = 0.1329, lr_0 = 1.2537e-04
Loss = 3.1952e-03, PNorm = 155.1855, GNorm = 0.0567, lr_0 = 1.2528e-04
Loss = 2.0696e-03, PNorm = 155.1879, GNorm = 0.1006, lr_0 = 1.2520e-04
Loss = 8.9510e-04, PNorm = 155.1885, GNorm = 0.0416, lr_0 = 1.2511e-04
Loss = 2.5858e-03, PNorm = 155.1900, GNorm = 0.4257, lr_0 = 1.2502e-04
Loss = 1.2413e-03, PNorm = 155.1918, GNorm = 0.1061, lr_0 = 1.2494e-04
Loss = 1.0412e-03, PNorm = 155.1941, GNorm = 0.0894, lr_0 = 1.2485e-04
Loss = 6.0499e-04, PNorm = 155.1963, GNorm = 0.0428, lr_0 = 1.2477e-04
Loss = 2.2146e-03, PNorm = 155.1965, GNorm = 0.0785, lr_0 = 1.2468e-04
Loss = 5.8735e-04, PNorm = 155.1970, GNorm = 0.0326, lr_0 = 1.2460e-04
Loss = 7.4962e-04, PNorm = 155.1979, GNorm = 0.0989, lr_0 = 1.2451e-04
Loss = 8.9892e-04, PNorm = 155.1994, GNorm = 0.0696, lr_0 = 1.2443e-04
Loss = 7.2355e-04, PNorm = 155.2013, GNorm = 0.0509, lr_0 = 1.2434e-04
Loss = 1.3303e-03, PNorm = 155.2030, GNorm = 0.0706, lr_0 = 1.2426e-04
Loss = 1.0922e-03, PNorm = 155.2035, GNorm = 0.1937, lr_0 = 1.2417e-04
Loss = 3.1179e-03, PNorm = 155.2055, GNorm = 0.1475, lr_0 = 1.2409e-04
Loss = 8.0697e-04, PNorm = 155.2064, GNorm = 0.1536, lr_0 = 1.2400e-04
Loss = 5.1361e-04, PNorm = 155.2087, GNorm = 0.0867, lr_0 = 1.2392e-04
Loss = 9.5633e-04, PNorm = 155.2097, GNorm = 0.0833, lr_0 = 1.2383e-04
Loss = 6.2689e-04, PNorm = 155.2123, GNorm = 0.0461, lr_0 = 1.2375e-04
Loss = 1.6016e-03, PNorm = 155.2138, GNorm = 0.1216, lr_0 = 1.2366e-04
Loss = 1.7138e-03, PNorm = 155.2147, GNorm = 0.2551, lr_0 = 1.2358e-04
Loss = 7.8159e-04, PNorm = 155.2150, GNorm = 0.1251, lr_0 = 1.2349e-04
Loss = 2.4158e-03, PNorm = 155.2158, GNorm = 0.0446, lr_0 = 1.2341e-04
Loss = 5.6808e-04, PNorm = 155.2157, GNorm = 0.0516, lr_0 = 1.2332e-04
Loss = 2.0034e-03, PNorm = 155.2163, GNorm = 0.0416, lr_0 = 1.2324e-04
Loss = 8.9490e-04, PNorm = 155.2181, GNorm = 0.1692, lr_0 = 1.2315e-04
Loss = 1.1688e-03, PNorm = 155.2205, GNorm = 0.1497, lr_0 = 1.2307e-04
Loss = 6.6243e-04, PNorm = 155.2222, GNorm = 0.0333, lr_0 = 1.2298e-04
Loss = 1.0997e-03, PNorm = 155.2242, GNorm = 0.2380, lr_0 = 1.2290e-04
Loss = 2.0734e-03, PNorm = 155.2251, GNorm = 0.0616, lr_0 = 1.2282e-04
Loss = 1.0857e-03, PNorm = 155.2276, GNorm = 0.0363, lr_0 = 1.2273e-04
Loss = 5.1170e-04, PNorm = 155.2295, GNorm = 0.0721, lr_0 = 1.2265e-04
Loss = 1.2828e-03, PNorm = 155.2306, GNorm = 0.1475, lr_0 = 1.2256e-04
Loss = 9.9309e-04, PNorm = 155.2329, GNorm = 0.0598, lr_0 = 1.2248e-04
Loss = 7.3527e-04, PNorm = 155.2340, GNorm = 0.0583, lr_0 = 1.2240e-04
Loss = 1.3126e-03, PNorm = 155.2365, GNorm = 0.1111, lr_0 = 1.2231e-04
Loss = 6.6001e-04, PNorm = 155.2381, GNorm = 0.0556, lr_0 = 1.2223e-04
Loss = 1.0099e-03, PNorm = 155.2398, GNorm = 0.2050, lr_0 = 1.2214e-04
Loss = 1.1704e-03, PNorm = 155.2408, GNorm = 0.0516, lr_0 = 1.2206e-04
Loss = 2.3537e-03, PNorm = 155.2437, GNorm = 0.1617, lr_0 = 1.2198e-04
Loss = 7.4703e-04, PNorm = 155.2449, GNorm = 0.1428, lr_0 = 1.2189e-04
Loss = 7.6525e-04, PNorm = 155.2454, GNorm = 0.0462, lr_0 = 1.2181e-04
Loss = 9.6702e-04, PNorm = 155.2465, GNorm = 0.0248, lr_0 = 1.2173e-04
Loss = 1.2223e-03, PNorm = 155.2481, GNorm = 0.0225, lr_0 = 1.2164e-04
Loss = 1.1957e-03, PNorm = 155.2485, GNorm = 0.1181, lr_0 = 1.2156e-04
Loss = 1.0468e-03, PNorm = 155.2494, GNorm = 0.0758, lr_0 = 1.2148e-04
Loss = 1.0873e-03, PNorm = 155.2504, GNorm = 0.0602, lr_0 = 1.2139e-04
Loss = 1.5509e-03, PNorm = 155.2507, GNorm = 0.1028, lr_0 = 1.2131e-04
Loss = 7.2960e-04, PNorm = 155.2516, GNorm = 0.1185, lr_0 = 1.2123e-04
Loss = 9.6185e-04, PNorm = 155.2537, GNorm = 0.0655, lr_0 = 1.2114e-04
Loss = 1.4222e-03, PNorm = 155.2561, GNorm = 0.0583, lr_0 = 1.2106e-04
Loss = 1.2159e-03, PNorm = 155.2576, GNorm = 0.0736, lr_0 = 1.2098e-04
Loss = 2.5930e-03, PNorm = 155.2577, GNorm = 0.0776, lr_0 = 1.2090e-04
Loss = 1.1663e-03, PNorm = 155.2580, GNorm = 0.1745, lr_0 = 1.2081e-04
Loss = 1.6573e-03, PNorm = 155.2580, GNorm = 0.1289, lr_0 = 1.2073e-04
Loss = 1.0987e-03, PNorm = 155.2596, GNorm = 0.0549, lr_0 = 1.2065e-04
Loss = 2.9659e-03, PNorm = 155.2607, GNorm = 0.1561, lr_0 = 1.2056e-04
Loss = 8.7979e-04, PNorm = 155.2640, GNorm = 0.0611, lr_0 = 1.2048e-04
Loss = 1.1730e-03, PNorm = 155.2646, GNorm = 0.0565, lr_0 = 1.2040e-04
Loss = 1.3891e-03, PNorm = 155.2662, GNorm = 0.1210, lr_0 = 1.2032e-04
Loss = 2.2481e-03, PNorm = 155.2681, GNorm = 0.3510, lr_0 = 1.2023e-04
Loss = 6.8956e-04, PNorm = 155.2684, GNorm = 0.0320, lr_0 = 1.2015e-04
Loss = 9.8928e-04, PNorm = 155.2714, GNorm = 0.0955, lr_0 = 1.2007e-04
Loss = 1.2981e-03, PNorm = 155.2731, GNorm = 0.1108, lr_0 = 1.1999e-04
Loss = 9.8958e-04, PNorm = 155.2751, GNorm = 0.0309, lr_0 = 1.1991e-04
Loss = 8.2779e-04, PNorm = 155.2765, GNorm = 0.1226, lr_0 = 1.1982e-04
Loss = 2.6497e-03, PNorm = 155.2780, GNorm = 0.0818, lr_0 = 1.1974e-04
Loss = 1.3660e-03, PNorm = 155.2788, GNorm = 0.0390, lr_0 = 1.1966e-04
Loss = 2.1424e-03, PNorm = 155.2803, GNorm = 0.0598, lr_0 = 1.1958e-04
Loss = 8.9997e-04, PNorm = 155.2830, GNorm = 0.0816, lr_0 = 1.1950e-04
Loss = 7.9154e-04, PNorm = 155.2835, GNorm = 0.0603, lr_0 = 1.1941e-04
Loss = 5.3219e-04, PNorm = 155.2863, GNorm = 0.0289, lr_0 = 1.1933e-04
Loss = 2.2792e-03, PNorm = 155.2881, GNorm = 0.1620, lr_0 = 1.1925e-04
Loss = 1.4747e-03, PNorm = 155.2887, GNorm = 0.0446, lr_0 = 1.1917e-04
Loss = 8.9647e-04, PNorm = 155.2908, GNorm = 0.1327, lr_0 = 1.1909e-04
Loss = 1.6489e-03, PNorm = 155.2933, GNorm = 0.2437, lr_0 = 1.1901e-04
Loss = 6.1433e-04, PNorm = 155.2955, GNorm = 0.0367, lr_0 = 1.1892e-04
Loss = 1.3526e-03, PNorm = 155.2956, GNorm = 0.0797, lr_0 = 1.1884e-04
Loss = 1.3954e-03, PNorm = 155.2958, GNorm = 0.1206, lr_0 = 1.1876e-04
Loss = 7.6573e-04, PNorm = 155.2964, GNorm = 0.0648, lr_0 = 1.1868e-04
Loss = 1.3177e-03, PNorm = 155.2971, GNorm = 0.0860, lr_0 = 1.1860e-04
Loss = 1.7056e-03, PNorm = 155.2991, GNorm = 0.0843, lr_0 = 1.1852e-04
Loss = 1.1076e-03, PNorm = 155.3005, GNorm = 0.0447, lr_0 = 1.1844e-04
Loss = 1.8199e-03, PNorm = 155.3015, GNorm = 0.0670, lr_0 = 1.1835e-04
Loss = 1.7500e-03, PNorm = 155.3030, GNorm = 0.1546, lr_0 = 1.1827e-04
Loss = 6.1401e-04, PNorm = 155.3047, GNorm = 0.1003, lr_0 = 1.1819e-04
Loss = 1.6404e-03, PNorm = 155.3064, GNorm = 0.1177, lr_0 = 1.1811e-04
Loss = 8.2444e-04, PNorm = 155.3077, GNorm = 0.1140, lr_0 = 1.1803e-04
Loss = 1.1716e-03, PNorm = 155.3092, GNorm = 0.2451, lr_0 = 1.1795e-04
Loss = 8.5322e-04, PNorm = 155.3118, GNorm = 0.0816, lr_0 = 1.1787e-04
Validation mae = 0.475363
Epoch 28
Loss = 1.6039e-03, PNorm = 155.3134, GNorm = 0.0359, lr_0 = 1.1779e-04
Loss = 7.1681e-04, PNorm = 155.3157, GNorm = 0.0532, lr_0 = 1.1771e-04
Loss = 5.9576e-04, PNorm = 155.3159, GNorm = 0.0936, lr_0 = 1.1763e-04
Loss = 5.2502e-04, PNorm = 155.3162, GNorm = 0.0467, lr_0 = 1.1755e-04
Loss = 2.3703e-03, PNorm = 155.3183, GNorm = 0.2082, lr_0 = 1.1747e-04
Loss = 1.8180e-03, PNorm = 155.3185, GNorm = 0.0448, lr_0 = 1.1739e-04
Loss = 1.0313e-03, PNorm = 155.3212, GNorm = 0.1566, lr_0 = 1.1730e-04
Loss = 6.9564e-04, PNorm = 155.3223, GNorm = 0.0514, lr_0 = 1.1722e-04
Loss = 5.0584e-04, PNorm = 155.3223, GNorm = 0.0607, lr_0 = 1.1714e-04
Loss = 1.3166e-03, PNorm = 155.3233, GNorm = 0.0724, lr_0 = 1.1706e-04
Loss = 5.2223e-04, PNorm = 155.3245, GNorm = 0.0762, lr_0 = 1.1698e-04
Loss = 1.2755e-03, PNorm = 155.3248, GNorm = 0.1153, lr_0 = 1.1690e-04
Loss = 6.7041e-04, PNorm = 155.3243, GNorm = 0.0481, lr_0 = 1.1682e-04
Loss = 8.4297e-04, PNorm = 155.3244, GNorm = 0.0894, lr_0 = 1.1674e-04
Loss = 5.7694e-04, PNorm = 155.3262, GNorm = 0.0763, lr_0 = 1.1666e-04
Loss = 6.2986e-04, PNorm = 155.3281, GNorm = 0.1049, lr_0 = 1.1658e-04
Loss = 3.3747e-03, PNorm = 155.3293, GNorm = 0.0345, lr_0 = 1.1650e-04
Loss = 5.9985e-04, PNorm = 155.3313, GNorm = 0.0785, lr_0 = 1.1642e-04
Loss = 1.0022e-03, PNorm = 155.3309, GNorm = 0.0351, lr_0 = 1.1634e-04
Loss = 1.6361e-03, PNorm = 155.3307, GNorm = 0.1401, lr_0 = 1.1626e-04
Loss = 8.0012e-04, PNorm = 155.3319, GNorm = 0.0904, lr_0 = 1.1618e-04
Loss = 8.9033e-04, PNorm = 155.3336, GNorm = 0.0261, lr_0 = 1.1611e-04
Loss = 5.4977e-04, PNorm = 155.3360, GNorm = 0.0506, lr_0 = 1.1603e-04
Loss = 9.9541e-04, PNorm = 155.3376, GNorm = 0.0698, lr_0 = 1.1595e-04
Loss = 7.6686e-04, PNorm = 155.3400, GNorm = 0.1426, lr_0 = 1.1587e-04
Loss = 7.6771e-04, PNorm = 155.3406, GNorm = 0.0527, lr_0 = 1.1579e-04
Loss = 6.3753e-04, PNorm = 155.3415, GNorm = 0.0335, lr_0 = 1.1571e-04
Loss = 8.7264e-04, PNorm = 155.3438, GNorm = 0.0460, lr_0 = 1.1563e-04
Loss = 4.8793e-04, PNorm = 155.3455, GNorm = 0.0860, lr_0 = 1.1555e-04
Loss = 3.5226e-03, PNorm = 155.3472, GNorm = 0.0989, lr_0 = 1.1547e-04
Loss = 1.0851e-03, PNorm = 155.3490, GNorm = 0.0850, lr_0 = 1.1539e-04
Loss = 6.2058e-04, PNorm = 155.3508, GNorm = 0.0789, lr_0 = 1.1531e-04
Loss = 1.4249e-03, PNorm = 155.3523, GNorm = 0.1071, lr_0 = 1.1523e-04
Loss = 1.0044e-03, PNorm = 155.3523, GNorm = 0.3169, lr_0 = 1.1515e-04
Loss = 5.3132e-04, PNorm = 155.3542, GNorm = 0.0697, lr_0 = 1.1508e-04
Loss = 1.4235e-03, PNorm = 155.3553, GNorm = 0.0384, lr_0 = 1.1500e-04
Loss = 3.2882e-03, PNorm = 155.3558, GNorm = 0.0868, lr_0 = 1.1492e-04
Loss = 1.3500e-03, PNorm = 155.3561, GNorm = 0.0386, lr_0 = 1.1484e-04
Loss = 7.7844e-04, PNorm = 155.3578, GNorm = 0.0832, lr_0 = 1.1476e-04
Loss = 1.8400e-03, PNorm = 155.3604, GNorm = 0.1676, lr_0 = 1.1468e-04
Loss = 2.2175e-03, PNorm = 155.3606, GNorm = 0.0844, lr_0 = 1.1460e-04
Loss = 1.1651e-03, PNorm = 155.3616, GNorm = 0.0239, lr_0 = 1.1452e-04
Loss = 5.1261e-04, PNorm = 155.3626, GNorm = 0.0503, lr_0 = 1.1445e-04
Loss = 1.2337e-03, PNorm = 155.3625, GNorm = 0.0214, lr_0 = 1.1437e-04
Loss = 6.9864e-04, PNorm = 155.3632, GNorm = 0.1702, lr_0 = 1.1429e-04
Loss = 4.6047e-04, PNorm = 155.3653, GNorm = 0.0452, lr_0 = 1.1421e-04
Loss = 8.6741e-04, PNorm = 155.3674, GNorm = 0.0740, lr_0 = 1.1413e-04
Loss = 6.5491e-04, PNorm = 155.3699, GNorm = 0.0261, lr_0 = 1.1405e-04
Loss = 1.4428e-03, PNorm = 155.3711, GNorm = 0.0281, lr_0 = 1.1398e-04
Loss = 9.1704e-04, PNorm = 155.3731, GNorm = 0.0719, lr_0 = 1.1390e-04
Loss = 1.3800e-03, PNorm = 155.3756, GNorm = 0.0510, lr_0 = 1.1382e-04
Loss = 1.5419e-03, PNorm = 155.3762, GNorm = 0.0726, lr_0 = 1.1374e-04
Loss = 1.3921e-03, PNorm = 155.3780, GNorm = 0.0992, lr_0 = 1.1366e-04
Loss = 6.2766e-04, PNorm = 155.3772, GNorm = 0.0697, lr_0 = 1.1359e-04
Loss = 1.8758e-03, PNorm = 155.3774, GNorm = 0.0235, lr_0 = 1.1351e-04
Loss = 7.3205e-04, PNorm = 155.3788, GNorm = 0.0583, lr_0 = 1.1343e-04
Loss = 6.7225e-04, PNorm = 155.3795, GNorm = 0.1047, lr_0 = 1.1335e-04
Loss = 8.8923e-04, PNorm = 155.3804, GNorm = 0.0534, lr_0 = 1.1328e-04
Loss = 7.7277e-04, PNorm = 155.3822, GNorm = 0.0445, lr_0 = 1.1320e-04
Loss = 1.1991e-03, PNorm = 155.3854, GNorm = 0.0665, lr_0 = 1.1312e-04
Loss = 7.1387e-04, PNorm = 155.3874, GNorm = 0.0863, lr_0 = 1.1304e-04
Loss = 8.8757e-04, PNorm = 155.3873, GNorm = 0.1499, lr_0 = 1.1297e-04
Loss = 6.5892e-04, PNorm = 155.3888, GNorm = 0.0868, lr_0 = 1.1289e-04
Loss = 1.2106e-03, PNorm = 155.3887, GNorm = 0.1294, lr_0 = 1.1281e-04
Loss = 3.6228e-03, PNorm = 155.3907, GNorm = 0.0712, lr_0 = 1.1273e-04
Loss = 1.7372e-03, PNorm = 155.3925, GNorm = 0.0442, lr_0 = 1.1266e-04
Loss = 3.7926e-03, PNorm = 155.3945, GNorm = 0.0510, lr_0 = 1.1258e-04
Loss = 1.3876e-03, PNorm = 155.3982, GNorm = 0.0724, lr_0 = 1.1250e-04
Loss = 5.8947e-04, PNorm = 155.3998, GNorm = 0.0885, lr_0 = 1.1243e-04
Loss = 1.0024e-03, PNorm = 155.4013, GNorm = 0.0538, lr_0 = 1.1235e-04
Loss = 5.7297e-04, PNorm = 155.4027, GNorm = 0.0952, lr_0 = 1.1227e-04
Loss = 4.8723e-04, PNorm = 155.4055, GNorm = 0.0256, lr_0 = 1.1219e-04
Loss = 7.7965e-04, PNorm = 155.4075, GNorm = 0.0976, lr_0 = 1.1212e-04
Loss = 1.7676e-03, PNorm = 155.4085, GNorm = 0.1277, lr_0 = 1.1204e-04
Loss = 1.7431e-03, PNorm = 155.4095, GNorm = 0.0983, lr_0 = 1.1196e-04
Loss = 4.2479e-04, PNorm = 155.4109, GNorm = 0.0635, lr_0 = 1.1189e-04
Loss = 7.3814e-04, PNorm = 155.4125, GNorm = 0.0546, lr_0 = 1.1181e-04
Loss = 9.3321e-04, PNorm = 155.4113, GNorm = 0.0683, lr_0 = 1.1173e-04
Loss = 2.6037e-03, PNorm = 155.4122, GNorm = 0.2826, lr_0 = 1.1166e-04
Loss = 1.8254e-03, PNorm = 155.4121, GNorm = 0.0511, lr_0 = 1.1158e-04
Loss = 5.1002e-04, PNorm = 155.4135, GNorm = 0.0539, lr_0 = 1.1150e-04
Loss = 2.1571e-03, PNorm = 155.4139, GNorm = 0.1154, lr_0 = 1.1143e-04
Loss = 8.9057e-04, PNorm = 155.4150, GNorm = 0.0432, lr_0 = 1.1135e-04
Loss = 1.1616e-03, PNorm = 155.4170, GNorm = 0.0859, lr_0 = 1.1128e-04
Loss = 9.2615e-04, PNorm = 155.4178, GNorm = 0.0453, lr_0 = 1.1120e-04
Loss = 2.3518e-03, PNorm = 155.4194, GNorm = 0.2642, lr_0 = 1.1112e-04
Loss = 2.3325e-03, PNorm = 155.4195, GNorm = 0.0518, lr_0 = 1.1105e-04
Loss = 1.4515e-03, PNorm = 155.4216, GNorm = 0.1059, lr_0 = 1.1097e-04
Loss = 1.1503e-03, PNorm = 155.4216, GNorm = 0.1074, lr_0 = 1.1089e-04
Loss = 5.2158e-04, PNorm = 155.4221, GNorm = 0.1379, lr_0 = 1.1082e-04
Loss = 1.7597e-03, PNorm = 155.4242, GNorm = 0.1986, lr_0 = 1.1074e-04
Loss = 2.8346e-03, PNorm = 155.4254, GNorm = 0.3650, lr_0 = 1.1067e-04
Loss = 1.0914e-03, PNorm = 155.4274, GNorm = 0.0736, lr_0 = 1.1059e-04
Loss = 1.4536e-03, PNorm = 155.4288, GNorm = 0.1313, lr_0 = 1.1052e-04
Loss = 1.3376e-03, PNorm = 155.4298, GNorm = 0.3544, lr_0 = 1.1044e-04
Loss = 1.0005e-03, PNorm = 155.4323, GNorm = 0.0717, lr_0 = 1.1036e-04
Loss = 5.4683e-04, PNorm = 155.4334, GNorm = 0.1657, lr_0 = 1.1029e-04
Loss = 5.9795e-04, PNorm = 155.4361, GNorm = 0.1231, lr_0 = 1.1021e-04
Loss = 9.0355e-04, PNorm = 155.4366, GNorm = 0.1048, lr_0 = 1.1014e-04
Loss = 1.9298e-03, PNorm = 155.4368, GNorm = 0.1682, lr_0 = 1.1006e-04
Loss = 5.6350e-04, PNorm = 155.4386, GNorm = 0.0753, lr_0 = 1.0999e-04
Loss = 1.4628e-03, PNorm = 155.4393, GNorm = 0.1125, lr_0 = 1.0991e-04
Loss = 5.4220e-04, PNorm = 155.4394, GNorm = 0.0266, lr_0 = 1.0984e-04
Loss = 1.0404e-03, PNorm = 155.4400, GNorm = 0.2753, lr_0 = 1.0976e-04
Loss = 5.8648e-04, PNorm = 155.4422, GNorm = 0.1040, lr_0 = 1.0969e-04
Loss = 2.1144e-03, PNorm = 155.4437, GNorm = 0.0203, lr_0 = 1.0961e-04
Loss = 5.2944e-04, PNorm = 155.4451, GNorm = 0.0350, lr_0 = 1.0954e-04
Loss = 2.3170e-03, PNorm = 155.4459, GNorm = 0.0529, lr_0 = 1.0946e-04
Loss = 8.3279e-04, PNorm = 155.4478, GNorm = 0.0920, lr_0 = 1.0939e-04
Loss = 1.0988e-03, PNorm = 155.4486, GNorm = 0.0316, lr_0 = 1.0931e-04
Loss = 1.0162e-03, PNorm = 155.4489, GNorm = 0.0936, lr_0 = 1.0924e-04
Loss = 7.8029e-04, PNorm = 155.4513, GNorm = 0.1040, lr_0 = 1.0916e-04
Loss = 4.8816e-04, PNorm = 155.4529, GNorm = 0.0617, lr_0 = 1.0909e-04
Loss = 1.3648e-03, PNorm = 155.4534, GNorm = 0.0681, lr_0 = 1.0901e-04
Loss = 9.1099e-04, PNorm = 155.4546, GNorm = 0.0660, lr_0 = 1.0894e-04
Loss = 6.1249e-04, PNorm = 155.4554, GNorm = 0.0380, lr_0 = 1.0886e-04
Loss = 1.0331e-03, PNorm = 155.4566, GNorm = 0.1432, lr_0 = 1.0879e-04
Loss = 5.6618e-04, PNorm = 155.4587, GNorm = 0.0649, lr_0 = 1.0871e-04
Loss = 1.6943e-03, PNorm = 155.4610, GNorm = 0.0331, lr_0 = 1.0864e-04
Loss = 1.2889e-03, PNorm = 155.4628, GNorm = 0.0293, lr_0 = 1.0856e-04
Validation mae = 0.475005
Epoch 29
Loss = 4.0405e-03, PNorm = 155.4636, GNorm = 0.1588, lr_0 = 1.0849e-04
Loss = 7.2202e-04, PNorm = 155.4643, GNorm = 0.0460, lr_0 = 1.0841e-04
Loss = 2.3707e-03, PNorm = 155.4648, GNorm = 0.3980, lr_0 = 1.0834e-04
Loss = 5.2383e-04, PNorm = 155.4643, GNorm = 0.0412, lr_0 = 1.0827e-04
Loss = 6.2443e-04, PNorm = 155.4659, GNorm = 0.0737, lr_0 = 1.0819e-04
Loss = 5.1656e-04, PNorm = 155.4673, GNorm = 0.1224, lr_0 = 1.0812e-04
Loss = 4.2647e-04, PNorm = 155.4680, GNorm = 0.0613, lr_0 = 1.0804e-04
Loss = 8.9983e-04, PNorm = 155.4672, GNorm = 0.0491, lr_0 = 1.0797e-04
Loss = 7.7736e-04, PNorm = 155.4680, GNorm = 0.0635, lr_0 = 1.0790e-04
Loss = 5.0654e-04, PNorm = 155.4678, GNorm = 0.0341, lr_0 = 1.0782e-04
Loss = 1.0405e-03, PNorm = 155.4675, GNorm = 0.0322, lr_0 = 1.0775e-04
Loss = 8.6822e-04, PNorm = 155.4675, GNorm = 0.1126, lr_0 = 1.0767e-04
Loss = 4.8273e-04, PNorm = 155.4683, GNorm = 0.0386, lr_0 = 1.0760e-04
Loss = 8.7682e-04, PNorm = 155.4700, GNorm = 0.1232, lr_0 = 1.0753e-04
Loss = 7.9967e-04, PNorm = 155.4729, GNorm = 0.0358, lr_0 = 1.0745e-04
Loss = 1.6496e-03, PNorm = 155.4744, GNorm = 0.0163, lr_0 = 1.0738e-04
Loss = 4.6790e-04, PNorm = 155.4759, GNorm = 0.0336, lr_0 = 1.0731e-04
Loss = 4.5921e-04, PNorm = 155.4768, GNorm = 0.0617, lr_0 = 1.0723e-04
Loss = 7.0911e-04, PNorm = 155.4776, GNorm = 0.0723, lr_0 = 1.0716e-04
Loss = 6.0548e-04, PNorm = 155.4776, GNorm = 0.1436, lr_0 = 1.0709e-04
Loss = 8.6833e-04, PNorm = 155.4790, GNorm = 0.0476, lr_0 = 1.0701e-04
Loss = 7.2506e-04, PNorm = 155.4793, GNorm = 0.0916, lr_0 = 1.0694e-04
Loss = 7.6305e-04, PNorm = 155.4794, GNorm = 0.0885, lr_0 = 1.0687e-04
Loss = 6.1683e-04, PNorm = 155.4814, GNorm = 0.1329, lr_0 = 1.0679e-04
Loss = 6.4260e-04, PNorm = 155.4839, GNorm = 0.0799, lr_0 = 1.0672e-04
Loss = 9.6374e-04, PNorm = 155.4847, GNorm = 0.0531, lr_0 = 1.0665e-04
Loss = 5.3459e-04, PNorm = 155.4858, GNorm = 0.0364, lr_0 = 1.0657e-04
Loss = 1.6396e-03, PNorm = 155.4871, GNorm = 0.2090, lr_0 = 1.0650e-04
Loss = 1.2028e-03, PNorm = 155.4875, GNorm = 0.1068, lr_0 = 1.0643e-04
Loss = 4.3549e-04, PNorm = 155.4879, GNorm = 0.0647, lr_0 = 1.0635e-04
Loss = 4.3776e-04, PNorm = 155.4884, GNorm = 0.0363, lr_0 = 1.0628e-04
Loss = 4.0668e-04, PNorm = 155.4899, GNorm = 0.0561, lr_0 = 1.0621e-04
Loss = 8.5666e-04, PNorm = 155.4910, GNorm = 0.0301, lr_0 = 1.0614e-04
Loss = 4.6254e-04, PNorm = 155.4924, GNorm = 0.0515, lr_0 = 1.0606e-04
Loss = 4.7464e-04, PNorm = 155.4931, GNorm = 0.0414, lr_0 = 1.0599e-04
Loss = 1.2769e-03, PNorm = 155.4941, GNorm = 0.0401, lr_0 = 1.0592e-04
Loss = 5.1397e-04, PNorm = 155.4946, GNorm = 0.0853, lr_0 = 1.0585e-04
Loss = 8.2563e-04, PNorm = 155.4947, GNorm = 0.0778, lr_0 = 1.0577e-04
Loss = 8.2454e-04, PNorm = 155.4965, GNorm = 0.0920, lr_0 = 1.0570e-04
Loss = 1.4549e-03, PNorm = 155.4988, GNorm = 0.0799, lr_0 = 1.0563e-04
Loss = 6.1531e-04, PNorm = 155.4998, GNorm = 0.0670, lr_0 = 1.0556e-04
Loss = 4.1989e-04, PNorm = 155.5005, GNorm = 0.0309, lr_0 = 1.0548e-04
Loss = 1.4560e-03, PNorm = 155.5006, GNorm = 0.0629, lr_0 = 1.0541e-04
Loss = 4.4107e-04, PNorm = 155.5003, GNorm = 0.0526, lr_0 = 1.0534e-04
Loss = 1.2106e-03, PNorm = 155.5006, GNorm = 0.0365, lr_0 = 1.0527e-04
Loss = 4.4367e-04, PNorm = 155.5022, GNorm = 0.1634, lr_0 = 1.0519e-04
Loss = 1.5312e-03, PNorm = 155.5038, GNorm = 0.0887, lr_0 = 1.0512e-04
Loss = 4.9517e-04, PNorm = 155.5042, GNorm = 0.0243, lr_0 = 1.0505e-04
Loss = 1.3092e-03, PNorm = 155.5065, GNorm = 0.1140, lr_0 = 1.0498e-04
Loss = 1.3276e-03, PNorm = 155.5083, GNorm = 0.0776, lr_0 = 1.0491e-04
Loss = 1.6483e-03, PNorm = 155.5101, GNorm = 0.0794, lr_0 = 1.0483e-04
Loss = 1.9537e-03, PNorm = 155.5106, GNorm = 0.0327, lr_0 = 1.0476e-04
Loss = 1.1270e-03, PNorm = 155.5112, GNorm = 0.0560, lr_0 = 1.0469e-04
Loss = 4.1891e-04, PNorm = 155.5120, GNorm = 0.0871, lr_0 = 1.0462e-04
Loss = 8.8581e-04, PNorm = 155.5114, GNorm = 0.0720, lr_0 = 1.0455e-04
Loss = 9.0251e-04, PNorm = 155.5117, GNorm = 0.0675, lr_0 = 1.0448e-04
Loss = 2.4804e-03, PNorm = 155.5117, GNorm = 0.0976, lr_0 = 1.0440e-04
Loss = 1.4632e-03, PNorm = 155.5138, GNorm = 0.0984, lr_0 = 1.0433e-04
Loss = 1.3285e-03, PNorm = 155.5148, GNorm = 0.0215, lr_0 = 1.0426e-04
Loss = 7.8810e-04, PNorm = 155.5147, GNorm = 0.0270, lr_0 = 1.0419e-04
Loss = 6.0385e-04, PNorm = 155.5164, GNorm = 0.0982, lr_0 = 1.0412e-04
Loss = 7.4090e-04, PNorm = 155.5179, GNorm = 0.0506, lr_0 = 1.0405e-04
Loss = 7.9541e-04, PNorm = 155.5186, GNorm = 0.1081, lr_0 = 1.0398e-04
Loss = 4.7243e-04, PNorm = 155.5192, GNorm = 0.0306, lr_0 = 1.0391e-04
Loss = 9.6562e-04, PNorm = 155.5206, GNorm = 0.1615, lr_0 = 1.0383e-04
Loss = 1.7045e-03, PNorm = 155.5223, GNorm = 0.2068, lr_0 = 1.0376e-04
Loss = 1.8269e-03, PNorm = 155.5234, GNorm = 0.0635, lr_0 = 1.0369e-04
Loss = 1.0516e-03, PNorm = 155.5241, GNorm = 0.0628, lr_0 = 1.0362e-04
Loss = 7.9478e-04, PNorm = 155.5272, GNorm = 0.1745, lr_0 = 1.0355e-04
Loss = 1.4513e-03, PNorm = 155.5288, GNorm = 0.0445, lr_0 = 1.0348e-04
Loss = 1.3937e-03, PNorm = 155.5291, GNorm = 0.1715, lr_0 = 1.0341e-04
Loss = 8.3141e-04, PNorm = 155.5301, GNorm = 0.0354, lr_0 = 1.0334e-04
Loss = 6.1859e-04, PNorm = 155.5317, GNorm = 0.0442, lr_0 = 1.0327e-04
Loss = 1.3626e-03, PNorm = 155.5342, GNorm = 0.2638, lr_0 = 1.0320e-04
Loss = 3.2070e-03, PNorm = 155.5361, GNorm = 0.1287, lr_0 = 1.0312e-04
Loss = 5.4140e-04, PNorm = 155.5361, GNorm = 0.0669, lr_0 = 1.0305e-04
Loss = 4.4403e-04, PNorm = 155.5374, GNorm = 0.1193, lr_0 = 1.0298e-04
Loss = 4.3785e-04, PNorm = 155.5365, GNorm = 0.1949, lr_0 = 1.0291e-04
Loss = 4.5330e-04, PNorm = 155.5379, GNorm = 0.0732, lr_0 = 1.0284e-04
Loss = 7.0122e-04, PNorm = 155.5388, GNorm = 0.0880, lr_0 = 1.0277e-04
Loss = 4.0092e-03, PNorm = 155.5400, GNorm = 0.1141, lr_0 = 1.0270e-04
Loss = 1.0888e-03, PNorm = 155.5435, GNorm = 0.1844, lr_0 = 1.0263e-04
Loss = 1.2513e-03, PNorm = 155.5445, GNorm = 0.1305, lr_0 = 1.0256e-04
Loss = 4.4094e-04, PNorm = 155.5447, GNorm = 0.0424, lr_0 = 1.0249e-04
Loss = 8.5996e-04, PNorm = 155.5454, GNorm = 0.0449, lr_0 = 1.0242e-04
Loss = 1.1990e-03, PNorm = 155.5454, GNorm = 0.1137, lr_0 = 1.0235e-04
Loss = 4.1365e-04, PNorm = 155.5462, GNorm = 0.0631, lr_0 = 1.0228e-04
Loss = 1.7634e-03, PNorm = 155.5470, GNorm = 0.0359, lr_0 = 1.0221e-04
Loss = 1.0565e-03, PNorm = 155.5486, GNorm = 0.1083, lr_0 = 1.0214e-04
Loss = 2.5742e-03, PNorm = 155.5494, GNorm = 0.0595, lr_0 = 1.0207e-04
Loss = 2.2404e-03, PNorm = 155.5540, GNorm = 0.0773, lr_0 = 1.0200e-04
Loss = 1.3980e-03, PNorm = 155.5575, GNorm = 0.1949, lr_0 = 1.0193e-04
Loss = 7.4937e-04, PNorm = 155.5593, GNorm = 0.0374, lr_0 = 1.0186e-04
Loss = 4.4724e-04, PNorm = 155.5601, GNorm = 0.0496, lr_0 = 1.0179e-04
Loss = 1.1729e-03, PNorm = 155.5612, GNorm = 0.0191, lr_0 = 1.0172e-04
Loss = 1.2055e-03, PNorm = 155.5619, GNorm = 0.1519, lr_0 = 1.0165e-04
Loss = 8.4051e-04, PNorm = 155.5632, GNorm = 0.0571, lr_0 = 1.0158e-04
Loss = 5.3986e-04, PNorm = 155.5634, GNorm = 0.1066, lr_0 = 1.0151e-04
Loss = 4.5234e-03, PNorm = 155.5640, GNorm = 0.0557, lr_0 = 1.0144e-04
Loss = 6.9744e-04, PNorm = 155.5653, GNorm = 0.0676, lr_0 = 1.0137e-04
Loss = 6.8812e-04, PNorm = 155.5678, GNorm = 0.0566, lr_0 = 1.0130e-04
Loss = 2.5018e-03, PNorm = 155.5698, GNorm = 0.0779, lr_0 = 1.0123e-04
Loss = 1.3100e-03, PNorm = 155.5695, GNorm = 0.1768, lr_0 = 1.0116e-04
Loss = 7.3619e-04, PNorm = 155.5690, GNorm = 0.1141, lr_0 = 1.0110e-04
Loss = 1.3306e-03, PNorm = 155.5700, GNorm = 0.0297, lr_0 = 1.0103e-04
Loss = 1.9434e-03, PNorm = 155.5710, GNorm = 0.1067, lr_0 = 1.0096e-04
Loss = 1.0782e-03, PNorm = 155.5711, GNorm = 0.0546, lr_0 = 1.0089e-04
Loss = 3.0095e-03, PNorm = 155.5715, GNorm = 0.0820, lr_0 = 1.0082e-04
Loss = 1.1683e-03, PNorm = 155.5730, GNorm = 0.1043, lr_0 = 1.0075e-04
Loss = 2.0688e-03, PNorm = 155.5749, GNorm = 0.1258, lr_0 = 1.0068e-04
Loss = 4.6992e-04, PNorm = 155.5773, GNorm = 0.1055, lr_0 = 1.0061e-04
Loss = 1.1429e-03, PNorm = 155.5777, GNorm = 0.0934, lr_0 = 1.0054e-04
Loss = 1.5975e-03, PNorm = 155.5788, GNorm = 0.0885, lr_0 = 1.0047e-04
Loss = 7.3522e-04, PNorm = 155.5795, GNorm = 0.0352, lr_0 = 1.0041e-04
Loss = 3.7558e-04, PNorm = 155.5828, GNorm = 0.0281, lr_0 = 1.0034e-04
Loss = 1.6028e-03, PNorm = 155.5841, GNorm = 0.0534, lr_0 = 1.0027e-04
Loss = 4.3122e-04, PNorm = 155.5854, GNorm = 0.0365, lr_0 = 1.0020e-04
Loss = 8.8273e-04, PNorm = 155.5858, GNorm = 0.0700, lr_0 = 1.0013e-04
Loss = 4.6245e-04, PNorm = 155.5867, GNorm = 0.0811, lr_0 = 1.0006e-04
Loss = 1.5207e-03, PNorm = 155.5882, GNorm = 0.1056, lr_0 = 1.0000e-04
Validation mae = 0.475093
Model 0 best validation mae = 0.475005 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.456409
Ensemble test mae = 0.456409
Fold 7
Splitting data with seed 7
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 8.9208e-01, PNorm = 62.2445, GNorm = 1.9857, lr_0 = 1.0413e-04
Loss = 7.4969e-01, PNorm = 62.2570, GNorm = 1.8757, lr_0 = 1.0788e-04
Loss = 5.9238e-01, PNorm = 62.2705, GNorm = 2.4767, lr_0 = 1.1163e-04
Loss = 4.8202e-01, PNorm = 62.2817, GNorm = 2.0786, lr_0 = 1.1537e-04
Loss = 4.4526e-01, PNorm = 62.2913, GNorm = 1.9813, lr_0 = 1.1913e-04
Loss = 4.3063e-01, PNorm = 62.3009, GNorm = 1.9445, lr_0 = 1.2287e-04
Loss = 4.2892e-01, PNorm = 62.3092, GNorm = 2.2171, lr_0 = 1.2663e-04
Loss = 3.6213e-01, PNorm = 62.3180, GNorm = 1.8164, lr_0 = 1.3038e-04
Loss = 3.9589e-01, PNorm = 62.3261, GNorm = 4.1366, lr_0 = 1.3413e-04
Loss = 3.6417e-01, PNorm = 62.3343, GNorm = 4.4856, lr_0 = 1.3788e-04
Loss = 3.9587e-01, PNorm = 62.3438, GNorm = 3.1481, lr_0 = 1.4163e-04
Loss = 3.2545e-01, PNorm = 62.3543, GNorm = 2.0244, lr_0 = 1.4537e-04
Loss = 3.4006e-01, PNorm = 62.3634, GNorm = 1.5132, lr_0 = 1.4913e-04
Loss = 3.5404e-01, PNorm = 62.3734, GNorm = 1.8864, lr_0 = 1.5288e-04
Loss = 3.5112e-01, PNorm = 62.3831, GNorm = 3.4969, lr_0 = 1.5662e-04
Loss = 3.4029e-01, PNorm = 62.3921, GNorm = 4.2043, lr_0 = 1.6038e-04
Loss = 3.0584e-01, PNorm = 62.4031, GNorm = 1.5624, lr_0 = 1.6412e-04
Loss = 3.1090e-01, PNorm = 62.4145, GNorm = 2.7992, lr_0 = 1.6788e-04
Loss = 3.2109e-01, PNorm = 62.4253, GNorm = 1.6374, lr_0 = 1.7163e-04
Loss = 3.1557e-01, PNorm = 62.4365, GNorm = 2.0127, lr_0 = 1.7538e-04
Loss = 3.1918e-01, PNorm = 62.4473, GNorm = 2.3098, lr_0 = 1.7913e-04
Loss = 3.1501e-01, PNorm = 62.4583, GNorm = 2.1289, lr_0 = 1.8288e-04
Loss = 2.8861e-01, PNorm = 62.4729, GNorm = 1.6007, lr_0 = 1.8662e-04
Loss = 3.0128e-01, PNorm = 62.4875, GNorm = 1.8396, lr_0 = 1.9038e-04
Loss = 3.1051e-01, PNorm = 62.5011, GNorm = 1.5158, lr_0 = 1.9413e-04
Loss = 2.9527e-01, PNorm = 62.5123, GNorm = 2.0253, lr_0 = 1.9788e-04
Loss = 2.8838e-01, PNorm = 62.5277, GNorm = 1.6339, lr_0 = 2.0163e-04
Loss = 2.5000e-01, PNorm = 62.5415, GNorm = 1.8843, lr_0 = 2.0537e-04
Loss = 2.4220e-01, PNorm = 62.5557, GNorm = 1.3243, lr_0 = 2.0913e-04
Loss = 2.5431e-01, PNorm = 62.5696, GNorm = 1.2419, lr_0 = 2.1288e-04
Loss = 2.9669e-01, PNorm = 62.5832, GNorm = 1.4092, lr_0 = 2.1663e-04
Loss = 2.7469e-01, PNorm = 62.6003, GNorm = 1.3398, lr_0 = 2.2038e-04
Loss = 2.7213e-01, PNorm = 62.6164, GNorm = 2.0497, lr_0 = 2.2412e-04
Loss = 2.7157e-01, PNorm = 62.6296, GNorm = 1.6437, lr_0 = 2.2787e-04
Loss = 2.7476e-01, PNorm = 62.6456, GNorm = 1.7474, lr_0 = 2.3163e-04
Loss = 2.7730e-01, PNorm = 62.6648, GNorm = 1.2585, lr_0 = 2.3538e-04
Loss = 2.8712e-01, PNorm = 62.6812, GNorm = 1.2900, lr_0 = 2.3913e-04
Loss = 2.7662e-01, PNorm = 62.7014, GNorm = 1.2356, lr_0 = 2.4288e-04
Loss = 2.8535e-01, PNorm = 62.7161, GNorm = 1.7693, lr_0 = 2.4662e-04
Loss = 2.5637e-01, PNorm = 62.7358, GNorm = 1.5482, lr_0 = 2.5038e-04
Loss = 2.5191e-01, PNorm = 62.7557, GNorm = 1.3945, lr_0 = 2.5413e-04
Loss = 2.5880e-01, PNorm = 62.7738, GNorm = 1.2280, lr_0 = 2.5788e-04
Loss = 2.6055e-01, PNorm = 62.7925, GNorm = 1.2701, lr_0 = 2.6163e-04
Loss = 2.5438e-01, PNorm = 62.8141, GNorm = 1.1024, lr_0 = 2.6537e-04
Loss = 2.4769e-01, PNorm = 62.8365, GNorm = 1.5617, lr_0 = 2.6912e-04
Loss = 2.6578e-01, PNorm = 62.8557, GNorm = 1.1207, lr_0 = 2.7288e-04
Loss = 2.7725e-01, PNorm = 62.8786, GNorm = 1.2294, lr_0 = 2.7663e-04
Loss = 2.9860e-01, PNorm = 62.9040, GNorm = 2.5588, lr_0 = 2.8038e-04
Loss = 2.5830e-01, PNorm = 62.9280, GNorm = 2.4108, lr_0 = 2.8413e-04
Loss = 3.0738e-01, PNorm = 62.9495, GNorm = 2.2737, lr_0 = 2.8787e-04
Loss = 3.0518e-01, PNorm = 62.9834, GNorm = 2.1869, lr_0 = 2.9163e-04
Loss = 2.9018e-01, PNorm = 63.0075, GNorm = 1.5658, lr_0 = 2.9538e-04
Loss = 3.0779e-01, PNorm = 63.0334, GNorm = 1.9010, lr_0 = 2.9913e-04
Loss = 3.1290e-01, PNorm = 63.0651, GNorm = 1.4569, lr_0 = 3.0288e-04
Loss = 2.5930e-01, PNorm = 63.0898, GNorm = 1.3665, lr_0 = 3.0662e-04
Loss = 2.4300e-01, PNorm = 63.1157, GNorm = 1.0590, lr_0 = 3.1037e-04
Loss = 2.3721e-01, PNorm = 63.1453, GNorm = 1.6453, lr_0 = 3.1413e-04
Loss = 2.6785e-01, PNorm = 63.1718, GNorm = 1.6278, lr_0 = 3.1788e-04
Loss = 2.3165e-01, PNorm = 63.1974, GNorm = 1.1104, lr_0 = 3.2163e-04
Loss = 2.3487e-01, PNorm = 63.2239, GNorm = 1.0300, lr_0 = 3.2538e-04
Loss = 2.2942e-01, PNorm = 63.2486, GNorm = 1.1823, lr_0 = 3.2912e-04
Loss = 2.2852e-01, PNorm = 63.2767, GNorm = 1.4510, lr_0 = 3.3288e-04
Loss = 2.5534e-01, PNorm = 63.3016, GNorm = 1.4139, lr_0 = 3.3663e-04
Loss = 2.7009e-01, PNorm = 63.3267, GNorm = 1.4589, lr_0 = 3.4038e-04
Loss = 2.4807e-01, PNorm = 63.3574, GNorm = 1.2188, lr_0 = 3.4413e-04
Loss = 2.2791e-01, PNorm = 63.3836, GNorm = 1.2551, lr_0 = 3.4787e-04
Loss = 2.3638e-01, PNorm = 63.4132, GNorm = 2.0847, lr_0 = 3.5162e-04
Loss = 2.4832e-01, PNorm = 63.4400, GNorm = 1.0454, lr_0 = 3.5538e-04
Loss = 2.6335e-01, PNorm = 63.4735, GNorm = 1.1801, lr_0 = 3.5913e-04
Loss = 2.4419e-01, PNorm = 63.5010, GNorm = 1.1664, lr_0 = 3.6288e-04
Loss = 2.3430e-01, PNorm = 63.5337, GNorm = 0.9998, lr_0 = 3.6662e-04
Loss = 2.2896e-01, PNorm = 63.5654, GNorm = 1.0505, lr_0 = 3.7037e-04
Loss = 2.1947e-01, PNorm = 63.5945, GNorm = 1.3871, lr_0 = 3.7413e-04
Loss = 2.4007e-01, PNorm = 63.6234, GNorm = 0.8447, lr_0 = 3.7788e-04
Loss = 2.5322e-01, PNorm = 63.6554, GNorm = 1.2838, lr_0 = 3.8163e-04
Loss = 2.6699e-01, PNorm = 63.6899, GNorm = 1.1917, lr_0 = 3.8537e-04
Loss = 2.6077e-01, PNorm = 63.7264, GNorm = 1.0592, lr_0 = 3.8912e-04
Loss = 2.2596e-01, PNorm = 63.7669, GNorm = 1.1962, lr_0 = 3.9287e-04
Loss = 2.4274e-01, PNorm = 63.7965, GNorm = 0.9197, lr_0 = 3.9663e-04
Loss = 2.5813e-01, PNorm = 63.8338, GNorm = 1.2049, lr_0 = 4.0038e-04
Loss = 2.5883e-01, PNorm = 63.8729, GNorm = 1.1947, lr_0 = 4.0413e-04
Loss = 2.5700e-01, PNorm = 63.9024, GNorm = 1.5295, lr_0 = 4.0787e-04
Loss = 2.6529e-01, PNorm = 63.9412, GNorm = 1.1444, lr_0 = 4.1162e-04
Loss = 2.3264e-01, PNorm = 63.9791, GNorm = 0.9001, lr_0 = 4.1537e-04
Loss = 2.0057e-01, PNorm = 64.0175, GNorm = 0.8372, lr_0 = 4.1913e-04
Loss = 1.9875e-01, PNorm = 64.0526, GNorm = 1.3744, lr_0 = 4.2288e-04
Loss = 2.2292e-01, PNorm = 64.0870, GNorm = 1.4333, lr_0 = 4.2662e-04
Loss = 2.5009e-01, PNorm = 64.1230, GNorm = 1.1662, lr_0 = 4.3037e-04
Loss = 1.9899e-01, PNorm = 64.1612, GNorm = 0.9100, lr_0 = 4.3412e-04
Loss = 2.1652e-01, PNorm = 64.2047, GNorm = 1.2149, lr_0 = 4.3788e-04
Loss = 2.2170e-01, PNorm = 64.2412, GNorm = 1.4439, lr_0 = 4.4163e-04
Loss = 2.2205e-01, PNorm = 64.2884, GNorm = 1.1996, lr_0 = 4.4538e-04
Loss = 2.5460e-01, PNorm = 64.3277, GNorm = 1.5335, lr_0 = 4.4912e-04
Loss = 2.3933e-01, PNorm = 64.3743, GNorm = 1.0696, lr_0 = 4.5287e-04
Loss = 2.4283e-01, PNorm = 64.4167, GNorm = 1.5380, lr_0 = 4.5662e-04
Loss = 2.1556e-01, PNorm = 64.4615, GNorm = 1.0702, lr_0 = 4.6038e-04
Loss = 2.2241e-01, PNorm = 64.5086, GNorm = 1.0344, lr_0 = 4.6413e-04
Loss = 2.2431e-01, PNorm = 64.5498, GNorm = 1.0553, lr_0 = 4.6787e-04
Loss = 2.2236e-01, PNorm = 64.5966, GNorm = 0.9223, lr_0 = 4.7162e-04
Loss = 2.5392e-01, PNorm = 64.6469, GNorm = 1.0038, lr_0 = 4.7537e-04
Loss = 2.2884e-01, PNorm = 64.6913, GNorm = 1.0932, lr_0 = 4.7913e-04
Loss = 2.3329e-01, PNorm = 64.7393, GNorm = 1.3533, lr_0 = 4.8288e-04
Loss = 2.0844e-01, PNorm = 64.7847, GNorm = 1.1521, lr_0 = 4.8663e-04
Loss = 2.3221e-01, PNorm = 64.8343, GNorm = 0.9979, lr_0 = 4.9038e-04
Loss = 2.1480e-01, PNorm = 64.8795, GNorm = 0.9853, lr_0 = 4.9412e-04
Loss = 2.2896e-01, PNorm = 64.9281, GNorm = 0.8996, lr_0 = 4.9788e-04
Loss = 2.4544e-01, PNorm = 64.9747, GNorm = 0.9209, lr_0 = 5.0163e-04
Loss = 2.0631e-01, PNorm = 65.0218, GNorm = 1.0900, lr_0 = 5.0538e-04
Loss = 2.1784e-01, PNorm = 65.0667, GNorm = 0.8726, lr_0 = 5.0913e-04
Loss = 2.4614e-01, PNorm = 65.1137, GNorm = 1.0392, lr_0 = 5.1287e-04
Loss = 1.9119e-01, PNorm = 65.1656, GNorm = 0.9894, lr_0 = 5.1663e-04
Loss = 2.1055e-01, PNorm = 65.2094, GNorm = 0.8024, lr_0 = 5.2038e-04
Loss = 2.3319e-01, PNorm = 65.2574, GNorm = 1.1862, lr_0 = 5.2413e-04
Loss = 2.2947e-01, PNorm = 65.3147, GNorm = 1.4679, lr_0 = 5.2788e-04
Loss = 2.0993e-01, PNorm = 65.3681, GNorm = 1.2174, lr_0 = 5.3162e-04
Loss = 2.1987e-01, PNorm = 65.4176, GNorm = 0.8888, lr_0 = 5.3538e-04
Loss = 2.1329e-01, PNorm = 65.4704, GNorm = 1.2420, lr_0 = 5.3912e-04
Loss = 2.1463e-01, PNorm = 65.5190, GNorm = 1.0607, lr_0 = 5.4288e-04
Loss = 2.0711e-01, PNorm = 65.5721, GNorm = 0.9094, lr_0 = 5.4663e-04
Loss = 2.2055e-01, PNorm = 65.6271, GNorm = 1.0566, lr_0 = 5.5038e-04
Validation mae = 0.563553
Epoch 1
Loss = 1.4708e-01, PNorm = 65.6829, GNorm = 0.9171, lr_0 = 5.5413e-04
Loss = 1.5096e-01, PNorm = 65.7321, GNorm = 0.6673, lr_0 = 5.5787e-04
Loss = 1.4339e-01, PNorm = 65.7854, GNorm = 0.7262, lr_0 = 5.6163e-04
Loss = 1.5740e-01, PNorm = 65.8316, GNorm = 0.7240, lr_0 = 5.6538e-04
Loss = 1.4773e-01, PNorm = 65.8906, GNorm = 0.6556, lr_0 = 5.6913e-04
Loss = 1.5185e-01, PNorm = 65.9372, GNorm = 0.8499, lr_0 = 5.7288e-04
Loss = 1.3785e-01, PNorm = 65.9948, GNorm = 0.7955, lr_0 = 5.7662e-04
Loss = 1.4668e-01, PNorm = 66.0500, GNorm = 1.0172, lr_0 = 5.8038e-04
Loss = 1.4484e-01, PNorm = 66.1038, GNorm = 0.7884, lr_0 = 5.8413e-04
Loss = 1.3512e-01, PNorm = 66.1591, GNorm = 0.6405, lr_0 = 5.8788e-04
Loss = 1.5165e-01, PNorm = 66.2177, GNorm = 0.8324, lr_0 = 5.9163e-04
Loss = 1.5267e-01, PNorm = 66.2788, GNorm = 0.8492, lr_0 = 5.9538e-04
Loss = 1.6955e-01, PNorm = 66.3381, GNorm = 0.7718, lr_0 = 5.9913e-04
Loss = 1.4556e-01, PNorm = 66.3943, GNorm = 0.7807, lr_0 = 6.0288e-04
Loss = 1.5445e-01, PNorm = 66.4598, GNorm = 0.7455, lr_0 = 6.0663e-04
Loss = 1.5290e-01, PNorm = 66.5241, GNorm = 0.7525, lr_0 = 6.1038e-04
Loss = 1.6201e-01, PNorm = 66.5882, GNorm = 1.1614, lr_0 = 6.1413e-04
Loss = 1.3199e-01, PNorm = 66.6529, GNorm = 1.3178, lr_0 = 6.1788e-04
Loss = 1.3718e-01, PNorm = 66.7195, GNorm = 0.8195, lr_0 = 6.2163e-04
Loss = 1.5835e-01, PNorm = 66.7838, GNorm = 1.0197, lr_0 = 6.2538e-04
Loss = 1.5825e-01, PNorm = 66.8536, GNorm = 0.9897, lr_0 = 6.2913e-04
Loss = 1.4187e-01, PNorm = 66.9159, GNorm = 0.8817, lr_0 = 6.3288e-04
Loss = 1.3428e-01, PNorm = 66.9799, GNorm = 0.8035, lr_0 = 6.3663e-04
Loss = 1.4574e-01, PNorm = 67.0415, GNorm = 0.9283, lr_0 = 6.4038e-04
Loss = 1.5402e-01, PNorm = 67.0996, GNorm = 0.6962, lr_0 = 6.4413e-04
Loss = 1.4198e-01, PNorm = 67.1683, GNorm = 0.6443, lr_0 = 6.4788e-04
Loss = 1.4035e-01, PNorm = 67.2322, GNorm = 0.6316, lr_0 = 6.5163e-04
Loss = 1.5403e-01, PNorm = 67.2998, GNorm = 0.8330, lr_0 = 6.5538e-04
Loss = 1.4912e-01, PNorm = 67.3757, GNorm = 0.7809, lr_0 = 6.5913e-04
Loss = 1.5097e-01, PNorm = 67.4496, GNorm = 1.1198, lr_0 = 6.6288e-04
Loss = 1.6271e-01, PNorm = 67.5316, GNorm = 0.8217, lr_0 = 6.6663e-04
Loss = 1.6645e-01, PNorm = 67.6182, GNorm = 0.9010, lr_0 = 6.7038e-04
Loss = 1.4822e-01, PNorm = 67.7034, GNorm = 1.2855, lr_0 = 6.7413e-04
Loss = 1.6060e-01, PNorm = 67.7926, GNorm = 1.1486, lr_0 = 6.7788e-04
Loss = 1.5903e-01, PNorm = 67.8799, GNorm = 0.7757, lr_0 = 6.8163e-04
Loss = 1.5740e-01, PNorm = 67.9690, GNorm = 0.8552, lr_0 = 6.8538e-04
Loss = 1.5493e-01, PNorm = 68.0515, GNorm = 0.5643, lr_0 = 6.8913e-04
Loss = 1.6955e-01, PNorm = 68.1375, GNorm = 1.2123, lr_0 = 6.9288e-04
Loss = 1.5947e-01, PNorm = 68.2226, GNorm = 1.0177, lr_0 = 6.9663e-04
Loss = 1.7136e-01, PNorm = 68.3032, GNorm = 0.6930, lr_0 = 7.0038e-04
Loss = 1.4013e-01, PNorm = 68.3821, GNorm = 1.1896, lr_0 = 7.0413e-04
Loss = 1.4408e-01, PNorm = 68.4595, GNorm = 0.8317, lr_0 = 7.0788e-04
Loss = 1.4983e-01, PNorm = 68.5335, GNorm = 0.7980, lr_0 = 7.1163e-04
Loss = 1.7644e-01, PNorm = 68.6105, GNorm = 0.8393, lr_0 = 7.1538e-04
Loss = 1.6087e-01, PNorm = 68.6989, GNorm = 0.8835, lr_0 = 7.1913e-04
Loss = 1.4964e-01, PNorm = 68.7839, GNorm = 0.6864, lr_0 = 7.2288e-04
Loss = 1.4833e-01, PNorm = 68.8691, GNorm = 0.6383, lr_0 = 7.2663e-04
Loss = 1.5025e-01, PNorm = 68.9563, GNorm = 0.5669, lr_0 = 7.3038e-04
Loss = 1.6519e-01, PNorm = 69.0469, GNorm = 0.6931, lr_0 = 7.3413e-04
Loss = 1.6854e-01, PNorm = 69.1414, GNorm = 0.7324, lr_0 = 7.3788e-04
Loss = 1.7262e-01, PNorm = 69.2533, GNorm = 0.8092, lr_0 = 7.4163e-04
Loss = 1.8184e-01, PNorm = 69.3466, GNorm = 1.1844, lr_0 = 7.4538e-04
Loss = 1.8299e-01, PNorm = 69.4562, GNorm = 1.0393, lr_0 = 7.4913e-04
Loss = 1.6440e-01, PNorm = 69.5576, GNorm = 1.0665, lr_0 = 7.5288e-04
Loss = 1.7082e-01, PNorm = 69.6631, GNorm = 0.9572, lr_0 = 7.5663e-04
Loss = 1.5820e-01, PNorm = 69.7609, GNorm = 0.6722, lr_0 = 7.6038e-04
Loss = 1.6954e-01, PNorm = 69.8620, GNorm = 1.0155, lr_0 = 7.6413e-04
Loss = 1.7371e-01, PNorm = 69.9471, GNorm = 0.9082, lr_0 = 7.6788e-04
Loss = 1.7936e-01, PNorm = 70.0419, GNorm = 0.8528, lr_0 = 7.7163e-04
Loss = 1.8507e-01, PNorm = 70.1379, GNorm = 0.7061, lr_0 = 7.7538e-04
Loss = 1.8395e-01, PNorm = 70.2397, GNorm = 0.9400, lr_0 = 7.7913e-04
Loss = 1.9941e-01, PNorm = 70.3461, GNorm = 0.8646, lr_0 = 7.8288e-04
Loss = 1.9647e-01, PNorm = 70.4495, GNorm = 0.9914, lr_0 = 7.8663e-04
Loss = 2.0330e-01, PNorm = 70.5555, GNorm = 0.9372, lr_0 = 7.9038e-04
Loss = 1.5725e-01, PNorm = 70.6596, GNorm = 0.7559, lr_0 = 7.9413e-04
Loss = 1.6088e-01, PNorm = 70.7616, GNorm = 0.9759, lr_0 = 7.9788e-04
Loss = 1.5630e-01, PNorm = 70.8627, GNorm = 0.9193, lr_0 = 8.0163e-04
Loss = 1.6310e-01, PNorm = 70.9612, GNorm = 0.6056, lr_0 = 8.0538e-04
Loss = 1.6231e-01, PNorm = 71.0651, GNorm = 0.9209, lr_0 = 8.0913e-04
Loss = 1.6341e-01, PNorm = 71.1500, GNorm = 0.7273, lr_0 = 8.1288e-04
Loss = 1.7334e-01, PNorm = 71.2664, GNorm = 0.8778, lr_0 = 8.1663e-04
Loss = 1.7740e-01, PNorm = 71.3698, GNorm = 0.7414, lr_0 = 8.2038e-04
Loss = 1.7673e-01, PNorm = 71.4767, GNorm = 0.5933, lr_0 = 8.2413e-04
Loss = 1.5480e-01, PNorm = 71.5864, GNorm = 1.3313, lr_0 = 8.2788e-04
Loss = 1.7098e-01, PNorm = 71.6842, GNorm = 1.3071, lr_0 = 8.3163e-04
Loss = 1.7827e-01, PNorm = 71.7909, GNorm = 0.8370, lr_0 = 8.3538e-04
Loss = 1.8084e-01, PNorm = 71.8989, GNorm = 0.8687, lr_0 = 8.3913e-04
Loss = 1.6424e-01, PNorm = 72.0160, GNorm = 0.8306, lr_0 = 8.4288e-04
Loss = 1.6421e-01, PNorm = 72.1145, GNorm = 0.7360, lr_0 = 8.4663e-04
Loss = 1.8959e-01, PNorm = 72.2317, GNorm = 1.1378, lr_0 = 8.5038e-04
Loss = 1.7738e-01, PNorm = 72.3522, GNorm = 0.6080, lr_0 = 8.5413e-04
Loss = 1.3757e-01, PNorm = 72.4724, GNorm = 0.5424, lr_0 = 8.5788e-04
Loss = 1.6035e-01, PNorm = 72.5765, GNorm = 0.6921, lr_0 = 8.6163e-04
Loss = 1.8644e-01, PNorm = 72.6982, GNorm = 0.5303, lr_0 = 8.6538e-04
Loss = 1.8643e-01, PNorm = 72.8209, GNorm = 0.8318, lr_0 = 8.6913e-04
Loss = 1.7188e-01, PNorm = 72.9411, GNorm = 0.9203, lr_0 = 8.7288e-04
Loss = 1.6483e-01, PNorm = 73.0554, GNorm = 1.4133, lr_0 = 8.7663e-04
Loss = 1.6763e-01, PNorm = 73.1665, GNorm = 0.4767, lr_0 = 8.8038e-04
Loss = 1.6020e-01, PNorm = 73.2733, GNorm = 0.9168, lr_0 = 8.8413e-04
Loss = 1.8291e-01, PNorm = 73.3868, GNorm = 0.7762, lr_0 = 8.8788e-04
Loss = 1.6030e-01, PNorm = 73.4988, GNorm = 0.8964, lr_0 = 8.9163e-04
Loss = 1.7389e-01, PNorm = 73.6192, GNorm = 1.0518, lr_0 = 8.9538e-04
Loss = 1.6581e-01, PNorm = 73.7261, GNorm = 0.6935, lr_0 = 8.9913e-04
Loss = 1.8161e-01, PNorm = 73.8620, GNorm = 0.8084, lr_0 = 9.0288e-04
Loss = 1.5443e-01, PNorm = 73.9733, GNorm = 0.6677, lr_0 = 9.0663e-04
Loss = 1.7965e-01, PNorm = 74.0985, GNorm = 0.9904, lr_0 = 9.1038e-04
Loss = 1.8409e-01, PNorm = 74.2234, GNorm = 0.7816, lr_0 = 9.1413e-04
Loss = 1.8236e-01, PNorm = 74.3536, GNorm = 1.3380, lr_0 = 9.1788e-04
Loss = 1.8391e-01, PNorm = 74.4925, GNorm = 0.8253, lr_0 = 9.2163e-04
Loss = 1.9027e-01, PNorm = 74.6231, GNorm = 0.8386, lr_0 = 9.2538e-04
Loss = 1.6131e-01, PNorm = 74.7684, GNorm = 0.6939, lr_0 = 9.2913e-04
Loss = 2.1177e-01, PNorm = 74.9029, GNorm = 0.8423, lr_0 = 9.3288e-04
Loss = 1.9891e-01, PNorm = 75.0667, GNorm = 0.9550, lr_0 = 9.3663e-04
Loss = 1.7240e-01, PNorm = 75.2024, GNorm = 0.7809, lr_0 = 9.4038e-04
Loss = 1.6083e-01, PNorm = 75.3305, GNorm = 0.7758, lr_0 = 9.4413e-04
Loss = 1.7449e-01, PNorm = 75.4558, GNorm = 0.8113, lr_0 = 9.4788e-04
Loss = 1.6294e-01, PNorm = 75.5854, GNorm = 0.8516, lr_0 = 9.5163e-04
Loss = 2.0502e-01, PNorm = 75.7112, GNorm = 0.7854, lr_0 = 9.5538e-04
Loss = 1.6242e-01, PNorm = 75.8284, GNorm = 0.5042, lr_0 = 9.5913e-04
Loss = 1.6747e-01, PNorm = 75.9508, GNorm = 0.6369, lr_0 = 9.6288e-04
Loss = 2.1110e-01, PNorm = 76.0712, GNorm = 0.5587, lr_0 = 9.6663e-04
Loss = 1.7084e-01, PNorm = 76.2083, GNorm = 0.6885, lr_0 = 9.7038e-04
Loss = 1.6885e-01, PNorm = 76.3346, GNorm = 0.8637, lr_0 = 9.7413e-04
Loss = 1.8682e-01, PNorm = 76.4678, GNorm = 0.8806, lr_0 = 9.7788e-04
Loss = 1.9016e-01, PNorm = 76.6124, GNorm = 0.7282, lr_0 = 9.8163e-04
Loss = 1.7246e-01, PNorm = 76.7663, GNorm = 1.6206, lr_0 = 9.8537e-04
Loss = 1.7221e-01, PNorm = 76.9168, GNorm = 1.1474, lr_0 = 9.8912e-04
Loss = 1.8915e-01, PNorm = 77.0581, GNorm = 0.6432, lr_0 = 9.9288e-04
Loss = 1.5713e-01, PNorm = 77.1951, GNorm = 0.6583, lr_0 = 9.9663e-04
Loss = 1.8458e-01, PNorm = 77.3305, GNorm = 0.9449, lr_0 = 9.9993e-04
Validation mae = 0.535793
Epoch 2
Loss = 1.3332e-01, PNorm = 77.4725, GNorm = 0.7441, lr_0 = 9.9925e-04
Loss = 1.1001e-01, PNorm = 77.5918, GNorm = 0.5091, lr_0 = 9.9856e-04
Loss = 1.1615e-01, PNorm = 77.7046, GNorm = 1.1083, lr_0 = 9.9788e-04
Loss = 1.1464e-01, PNorm = 77.8120, GNorm = 0.4746, lr_0 = 9.9719e-04
Loss = 1.0834e-01, PNorm = 77.9243, GNorm = 0.6183, lr_0 = 9.9651e-04
Loss = 1.1508e-01, PNorm = 78.0165, GNorm = 0.5914, lr_0 = 9.9583e-04
Loss = 1.0986e-01, PNorm = 78.1297, GNorm = 1.3291, lr_0 = 9.9515e-04
Loss = 1.0830e-01, PNorm = 78.2213, GNorm = 0.7729, lr_0 = 9.9446e-04
Loss = 1.1381e-01, PNorm = 78.3253, GNorm = 0.4336, lr_0 = 9.9378e-04
Loss = 9.5398e-02, PNorm = 78.4306, GNorm = 1.2665, lr_0 = 9.9310e-04
Loss = 1.0412e-01, PNorm = 78.5226, GNorm = 0.5680, lr_0 = 9.9242e-04
Loss = 1.0527e-01, PNorm = 78.6186, GNorm = 0.5338, lr_0 = 9.9174e-04
Loss = 1.3808e-01, PNorm = 78.7292, GNorm = 0.8491, lr_0 = 9.9106e-04
Loss = 9.3013e-02, PNorm = 78.8363, GNorm = 0.5165, lr_0 = 9.9038e-04
Loss = 1.1373e-01, PNorm = 78.9317, GNorm = 0.5051, lr_0 = 9.8971e-04
Loss = 1.0559e-01, PNorm = 79.0237, GNorm = 0.5068, lr_0 = 9.8903e-04
Loss = 1.0157e-01, PNorm = 79.1111, GNorm = 0.5467, lr_0 = 9.8835e-04
Loss = 1.0106e-01, PNorm = 79.2005, GNorm = 0.4639, lr_0 = 9.8767e-04
Loss = 9.2028e-02, PNorm = 79.2890, GNorm = 0.4322, lr_0 = 9.8700e-04
Loss = 1.0577e-01, PNorm = 79.3638, GNorm = 0.7757, lr_0 = 9.8632e-04
Loss = 1.0025e-01, PNorm = 79.4655, GNorm = 0.3644, lr_0 = 9.8564e-04
Loss = 1.1808e-01, PNorm = 79.5495, GNorm = 0.6208, lr_0 = 9.8497e-04
Loss = 1.0356e-01, PNorm = 79.6403, GNorm = 0.6654, lr_0 = 9.8429e-04
Loss = 9.4827e-02, PNorm = 79.7284, GNorm = 0.5895, lr_0 = 9.8362e-04
Loss = 1.1591e-01, PNorm = 79.8219, GNorm = 0.7121, lr_0 = 9.8295e-04
Loss = 1.0149e-01, PNorm = 79.9298, GNorm = 0.5782, lr_0 = 9.8227e-04
Loss = 9.8534e-02, PNorm = 80.0293, GNorm = 0.5418, lr_0 = 9.8160e-04
Loss = 1.1963e-01, PNorm = 80.1433, GNorm = 0.4952, lr_0 = 9.8093e-04
Loss = 9.4523e-02, PNorm = 80.2429, GNorm = 0.6559, lr_0 = 9.8026e-04
Loss = 9.2564e-02, PNorm = 80.3410, GNorm = 0.7275, lr_0 = 9.7958e-04
Loss = 1.1772e-01, PNorm = 80.4389, GNorm = 0.6937, lr_0 = 9.7891e-04
Loss = 9.6376e-02, PNorm = 80.5452, GNorm = 0.5801, lr_0 = 9.7824e-04
Loss = 1.1214e-01, PNorm = 80.6418, GNorm = 0.8180, lr_0 = 9.7757e-04
Loss = 1.1031e-01, PNorm = 80.7506, GNorm = 0.5143, lr_0 = 9.7690e-04
Loss = 1.0096e-01, PNorm = 80.8556, GNorm = 0.6505, lr_0 = 9.7623e-04
Loss = 1.0306e-01, PNorm = 80.9528, GNorm = 0.4804, lr_0 = 9.7556e-04
Loss = 1.0022e-01, PNorm = 81.0495, GNorm = 0.6093, lr_0 = 9.7490e-04
Loss = 1.2207e-01, PNorm = 81.1463, GNorm = 0.5963, lr_0 = 9.7423e-04
Loss = 1.2004e-01, PNorm = 81.2519, GNorm = 0.5567, lr_0 = 9.7356e-04
Loss = 1.1949e-01, PNorm = 81.3595, GNorm = 0.7651, lr_0 = 9.7289e-04
Loss = 9.0574e-02, PNorm = 81.4599, GNorm = 0.5012, lr_0 = 9.7223e-04
Loss = 1.2543e-01, PNorm = 81.5585, GNorm = 0.5532, lr_0 = 9.7156e-04
Loss = 1.2716e-01, PNorm = 81.6646, GNorm = 0.5777, lr_0 = 9.7090e-04
Loss = 1.2057e-01, PNorm = 81.7763, GNorm = 0.5033, lr_0 = 9.7023e-04
Loss = 1.2296e-01, PNorm = 81.8894, GNorm = 0.7622, lr_0 = 9.6957e-04
Loss = 1.1079e-01, PNorm = 82.0042, GNorm = 0.6746, lr_0 = 9.6890e-04
Loss = 1.0106e-01, PNorm = 82.1163, GNorm = 0.6714, lr_0 = 9.6824e-04
Loss = 1.0360e-01, PNorm = 82.2114, GNorm = 0.4856, lr_0 = 9.6757e-04
Loss = 1.0691e-01, PNorm = 82.3079, GNorm = 0.5905, lr_0 = 9.6691e-04
Loss = 1.0766e-01, PNorm = 82.4015, GNorm = 0.8486, lr_0 = 9.6625e-04
Loss = 1.0649e-01, PNorm = 82.5066, GNorm = 0.5050, lr_0 = 9.6559e-04
Loss = 1.0459e-01, PNorm = 82.6089, GNorm = 0.6643, lr_0 = 9.6493e-04
Loss = 1.1167e-01, PNorm = 82.7160, GNorm = 0.7729, lr_0 = 9.6427e-04
Loss = 1.1887e-01, PNorm = 82.8245, GNorm = 0.5514, lr_0 = 9.6360e-04
Loss = 1.0318e-01, PNorm = 82.9303, GNorm = 0.4939, lr_0 = 9.6294e-04
Loss = 1.0250e-01, PNorm = 83.0289, GNorm = 0.8967, lr_0 = 9.6228e-04
Loss = 1.2144e-01, PNorm = 83.1249, GNorm = 1.8749, lr_0 = 9.6163e-04
Loss = 1.3468e-01, PNorm = 83.2376, GNorm = 0.9601, lr_0 = 9.6097e-04
Loss = 1.2198e-01, PNorm = 83.3545, GNorm = 0.5810, lr_0 = 9.6031e-04
Loss = 1.2595e-01, PNorm = 83.4682, GNorm = 0.4826, lr_0 = 9.5965e-04
Loss = 1.0353e-01, PNorm = 83.5775, GNorm = 0.4609, lr_0 = 9.5899e-04
Loss = 1.1773e-01, PNorm = 83.6806, GNorm = 0.7918, lr_0 = 9.5834e-04
Loss = 1.2829e-01, PNorm = 83.7681, GNorm = 0.7965, lr_0 = 9.5768e-04
Loss = 1.3730e-01, PNorm = 83.8805, GNorm = 0.6420, lr_0 = 9.5702e-04
Loss = 1.1539e-01, PNorm = 83.9966, GNorm = 0.5314, lr_0 = 9.5637e-04
Loss = 1.1911e-01, PNorm = 84.1053, GNorm = 0.4956, lr_0 = 9.5571e-04
Loss = 1.2088e-01, PNorm = 84.2264, GNorm = 0.5027, lr_0 = 9.5506e-04
Loss = 1.1174e-01, PNorm = 84.3335, GNorm = 0.7574, lr_0 = 9.5440e-04
Loss = 1.0043e-01, PNorm = 84.4458, GNorm = 1.0802, lr_0 = 9.5375e-04
Loss = 1.0889e-01, PNorm = 84.5441, GNorm = 0.4404, lr_0 = 9.5310e-04
Loss = 1.0656e-01, PNorm = 84.6427, GNorm = 0.4878, lr_0 = 9.5244e-04
Loss = 1.1224e-01, PNorm = 84.7477, GNorm = 0.5906, lr_0 = 9.5179e-04
Loss = 1.0633e-01, PNorm = 84.8330, GNorm = 0.5575, lr_0 = 9.5114e-04
Loss = 1.0919e-01, PNorm = 84.9375, GNorm = 0.5316, lr_0 = 9.5049e-04
Loss = 1.1466e-01, PNorm = 85.0338, GNorm = 0.5338, lr_0 = 9.4984e-04
Loss = 1.1719e-01, PNorm = 85.1417, GNorm = 1.0761, lr_0 = 9.4919e-04
Loss = 1.3129e-01, PNorm = 85.2451, GNorm = 0.8348, lr_0 = 9.4854e-04
Loss = 1.1493e-01, PNorm = 85.3598, GNorm = 0.5943, lr_0 = 9.4789e-04
Loss = 1.3867e-01, PNorm = 85.4735, GNorm = 0.5112, lr_0 = 9.4724e-04
Loss = 1.0398e-01, PNorm = 85.5934, GNorm = 0.8179, lr_0 = 9.4659e-04
Loss = 1.0606e-01, PNorm = 85.7013, GNorm = 0.5271, lr_0 = 9.4594e-04
Loss = 1.1681e-01, PNorm = 85.8201, GNorm = 0.7164, lr_0 = 9.4529e-04
Loss = 1.0259e-01, PNorm = 85.9234, GNorm = 0.5093, lr_0 = 9.4464e-04
Loss = 1.1178e-01, PNorm = 86.0231, GNorm = 0.6381, lr_0 = 9.4400e-04
Loss = 1.1394e-01, PNorm = 86.1283, GNorm = 0.8433, lr_0 = 9.4335e-04
Loss = 1.1256e-01, PNorm = 86.2312, GNorm = 0.5304, lr_0 = 9.4270e-04
Loss = 1.3051e-01, PNorm = 86.3301, GNorm = 0.6529, lr_0 = 9.4206e-04
Loss = 1.2066e-01, PNorm = 86.4358, GNorm = 0.5480, lr_0 = 9.4141e-04
Loss = 1.0928e-01, PNorm = 86.5394, GNorm = 0.8533, lr_0 = 9.4077e-04
Loss = 1.2942e-01, PNorm = 86.6359, GNorm = 0.7751, lr_0 = 9.4012e-04
Loss = 1.1480e-01, PNorm = 86.7361, GNorm = 0.5501, lr_0 = 9.3948e-04
Loss = 1.2704e-01, PNorm = 86.8352, GNorm = 0.4817, lr_0 = 9.3884e-04
Loss = 1.1217e-01, PNorm = 86.9396, GNorm = 0.5067, lr_0 = 9.3819e-04
Loss = 1.2809e-01, PNorm = 87.0382, GNorm = 0.6525, lr_0 = 9.3755e-04
Loss = 1.2781e-01, PNorm = 87.1446, GNorm = 0.5115, lr_0 = 9.3691e-04
Loss = 1.1594e-01, PNorm = 87.2663, GNorm = 0.6537, lr_0 = 9.3627e-04
Loss = 1.0785e-01, PNorm = 87.3751, GNorm = 0.4716, lr_0 = 9.3562e-04
Loss = 1.1200e-01, PNorm = 87.4840, GNorm = 0.6088, lr_0 = 9.3498e-04
Loss = 1.0122e-01, PNorm = 87.5947, GNorm = 0.7922, lr_0 = 9.3434e-04
Loss = 1.3650e-01, PNorm = 87.7110, GNorm = 0.5588, lr_0 = 9.3370e-04
Loss = 1.3506e-01, PNorm = 87.8252, GNorm = 0.9486, lr_0 = 9.3306e-04
Loss = 1.3161e-01, PNorm = 87.9566, GNorm = 0.4806, lr_0 = 9.3242e-04
Loss = 1.3808e-01, PNorm = 88.0791, GNorm = 0.6493, lr_0 = 9.3178e-04
Loss = 1.1583e-01, PNorm = 88.2104, GNorm = 0.5428, lr_0 = 9.3115e-04
Loss = 1.1077e-01, PNorm = 88.3211, GNorm = 1.0605, lr_0 = 9.3051e-04
Loss = 1.2984e-01, PNorm = 88.4318, GNorm = 1.0930, lr_0 = 9.2987e-04
Loss = 1.1982e-01, PNorm = 88.5436, GNorm = 0.4211, lr_0 = 9.2923e-04
Loss = 1.1100e-01, PNorm = 88.6424, GNorm = 0.5572, lr_0 = 9.2860e-04
Loss = 1.1792e-01, PNorm = 88.7446, GNorm = 0.9456, lr_0 = 9.2796e-04
Loss = 1.1535e-01, PNorm = 88.8498, GNorm = 0.8378, lr_0 = 9.2733e-04
Loss = 1.2006e-01, PNorm = 88.9696, GNorm = 0.5590, lr_0 = 9.2669e-04
Loss = 1.0999e-01, PNorm = 89.0659, GNorm = 0.8117, lr_0 = 9.2606e-04
Loss = 1.0849e-01, PNorm = 89.1688, GNorm = 0.6231, lr_0 = 9.2542e-04
Loss = 1.1967e-01, PNorm = 89.2668, GNorm = 0.9990, lr_0 = 9.2479e-04
Loss = 1.1596e-01, PNorm = 89.3683, GNorm = 0.6907, lr_0 = 9.2415e-04
Loss = 1.2201e-01, PNorm = 89.4633, GNorm = 0.4285, lr_0 = 9.2352e-04
Loss = 1.1867e-01, PNorm = 89.5758, GNorm = 0.7718, lr_0 = 9.2289e-04
Loss = 1.0819e-01, PNorm = 89.6824, GNorm = 0.7332, lr_0 = 9.2226e-04
Loss = 1.1766e-01, PNorm = 89.7921, GNorm = 0.6032, lr_0 = 9.2162e-04
Loss = 1.1379e-01, PNorm = 89.8984, GNorm = 0.4339, lr_0 = 9.2099e-04
Validation mae = 0.509775
Epoch 3
Loss = 6.6486e-02, PNorm = 89.9865, GNorm = 0.4991, lr_0 = 9.2036e-04
Loss = 6.8016e-02, PNorm = 90.0604, GNorm = 0.3517, lr_0 = 9.1973e-04
Loss = 5.9526e-02, PNorm = 90.1238, GNorm = 0.3555, lr_0 = 9.1910e-04
Loss = 6.5083e-02, PNorm = 90.1918, GNorm = 0.5278, lr_0 = 9.1847e-04
Loss = 5.8125e-02, PNorm = 90.2493, GNorm = 0.7703, lr_0 = 9.1784e-04
Loss = 6.2859e-02, PNorm = 90.3132, GNorm = 0.4972, lr_0 = 9.1721e-04
Loss = 7.0133e-02, PNorm = 90.3749, GNorm = 0.3701, lr_0 = 9.1658e-04
Loss = 6.8814e-02, PNorm = 90.4446, GNorm = 0.4661, lr_0 = 9.1596e-04
Loss = 6.8022e-02, PNorm = 90.5008, GNorm = 0.7732, lr_0 = 9.1533e-04
Loss = 6.1701e-02, PNorm = 90.5616, GNorm = 0.3887, lr_0 = 9.1470e-04
Loss = 6.5035e-02, PNorm = 90.6258, GNorm = 0.7498, lr_0 = 9.1408e-04
Loss = 5.9315e-02, PNorm = 90.6850, GNorm = 0.5534, lr_0 = 9.1345e-04
Loss = 6.1770e-02, PNorm = 90.7366, GNorm = 0.7653, lr_0 = 9.1282e-04
Loss = 6.5931e-02, PNorm = 90.8027, GNorm = 0.5800, lr_0 = 9.1220e-04
Loss = 7.1009e-02, PNorm = 90.8698, GNorm = 0.8442, lr_0 = 9.1157e-04
Loss = 6.3930e-02, PNorm = 90.9359, GNorm = 0.3840, lr_0 = 9.1095e-04
Loss = 6.9393e-02, PNorm = 91.0000, GNorm = 0.3389, lr_0 = 9.1032e-04
Loss = 5.9541e-02, PNorm = 91.0716, GNorm = 0.4663, lr_0 = 9.0970e-04
Loss = 5.3422e-02, PNorm = 91.1308, GNorm = 0.4192, lr_0 = 9.0908e-04
Loss = 6.9849e-02, PNorm = 91.1883, GNorm = 0.5974, lr_0 = 9.0846e-04
Loss = 6.8083e-02, PNorm = 91.2561, GNorm = 0.4694, lr_0 = 9.0783e-04
Loss = 6.7631e-02, PNorm = 91.3216, GNorm = 0.5741, lr_0 = 9.0721e-04
Loss = 6.5433e-02, PNorm = 91.3881, GNorm = 0.5890, lr_0 = 9.0659e-04
Loss = 7.2487e-02, PNorm = 91.4546, GNorm = 0.6109, lr_0 = 9.0597e-04
Loss = 6.5233e-02, PNorm = 91.5299, GNorm = 0.3943, lr_0 = 9.0535e-04
Loss = 6.4701e-02, PNorm = 91.6003, GNorm = 0.3579, lr_0 = 9.0473e-04
Loss = 5.9729e-02, PNorm = 91.6624, GNorm = 0.5896, lr_0 = 9.0411e-04
Loss = 5.7291e-02, PNorm = 91.7266, GNorm = 0.3208, lr_0 = 9.0349e-04
Loss = 5.6523e-02, PNorm = 91.7899, GNorm = 0.4731, lr_0 = 9.0287e-04
Loss = 7.1185e-02, PNorm = 91.8530, GNorm = 0.9410, lr_0 = 9.0225e-04
Loss = 7.3695e-02, PNorm = 91.9171, GNorm = 0.4355, lr_0 = 9.0163e-04
Loss = 8.1773e-02, PNorm = 92.0003, GNorm = 0.3269, lr_0 = 9.0102e-04
Loss = 6.5166e-02, PNorm = 92.0682, GNorm = 0.5068, lr_0 = 9.0040e-04
Loss = 6.9117e-02, PNorm = 92.1416, GNorm = 0.7123, lr_0 = 8.9978e-04
Loss = 6.7591e-02, PNorm = 92.2124, GNorm = 0.4481, lr_0 = 8.9916e-04
Loss = 7.9169e-02, PNorm = 92.2808, GNorm = 0.5448, lr_0 = 8.9855e-04
Loss = 6.3934e-02, PNorm = 92.3490, GNorm = 0.4456, lr_0 = 8.9793e-04
Loss = 6.2983e-02, PNorm = 92.4176, GNorm = 0.7769, lr_0 = 8.9732e-04
Loss = 7.1040e-02, PNorm = 92.4900, GNorm = 0.4308, lr_0 = 8.9670e-04
Loss = 7.1026e-02, PNorm = 92.5621, GNorm = 0.4351, lr_0 = 8.9609e-04
Loss = 7.9964e-02, PNorm = 92.6339, GNorm = 1.3261, lr_0 = 8.9548e-04
Loss = 6.7269e-02, PNorm = 92.7082, GNorm = 0.6554, lr_0 = 8.9486e-04
Loss = 7.2240e-02, PNorm = 92.7832, GNorm = 0.5712, lr_0 = 8.9425e-04
Loss = 7.6775e-02, PNorm = 92.8604, GNorm = 0.5136, lr_0 = 8.9364e-04
Loss = 6.3780e-02, PNorm = 92.9416, GNorm = 0.3962, lr_0 = 8.9302e-04
Loss = 6.5403e-02, PNorm = 93.0031, GNorm = 0.6133, lr_0 = 8.9241e-04
Loss = 6.7080e-02, PNorm = 93.0884, GNorm = 0.4249, lr_0 = 8.9180e-04
Loss = 6.7401e-02, PNorm = 93.1774, GNorm = 0.4835, lr_0 = 8.9119e-04
Loss = 7.2634e-02, PNorm = 93.2437, GNorm = 0.7987, lr_0 = 8.9058e-04
Loss = 6.7619e-02, PNorm = 93.3234, GNorm = 0.4157, lr_0 = 8.8997e-04
Loss = 7.2568e-02, PNorm = 93.4003, GNorm = 0.6546, lr_0 = 8.8936e-04
Loss = 6.6748e-02, PNorm = 93.4777, GNorm = 0.6426, lr_0 = 8.8875e-04
Loss = 8.4178e-02, PNorm = 93.5553, GNorm = 0.7156, lr_0 = 8.8814e-04
Loss = 7.9400e-02, PNorm = 93.6327, GNorm = 1.5835, lr_0 = 8.8753e-04
Loss = 6.8186e-02, PNorm = 93.7147, GNorm = 0.3421, lr_0 = 8.8693e-04
Loss = 7.4410e-02, PNorm = 93.7987, GNorm = 0.7436, lr_0 = 8.8632e-04
Loss = 7.6265e-02, PNorm = 93.8795, GNorm = 1.4395, lr_0 = 8.8571e-04
Loss = 6.4801e-02, PNorm = 93.9678, GNorm = 0.3796, lr_0 = 8.8510e-04
Loss = 7.0709e-02, PNorm = 94.0451, GNorm = 0.4084, lr_0 = 8.8450e-04
Loss = 7.4980e-02, PNorm = 94.1364, GNorm = 0.4396, lr_0 = 8.8389e-04
Loss = 8.4563e-02, PNorm = 94.2200, GNorm = 0.7591, lr_0 = 8.8329e-04
Loss = 7.0239e-02, PNorm = 94.3133, GNorm = 0.5133, lr_0 = 8.8268e-04
Loss = 7.7004e-02, PNorm = 94.3978, GNorm = 0.6384, lr_0 = 8.8208e-04
Loss = 6.6859e-02, PNorm = 94.4829, GNorm = 0.4599, lr_0 = 8.8147e-04
Loss = 7.1200e-02, PNorm = 94.5617, GNorm = 0.3807, lr_0 = 8.8087e-04
Loss = 7.8690e-02, PNorm = 94.6406, GNorm = 0.4632, lr_0 = 8.8026e-04
Loss = 7.4467e-02, PNorm = 94.7282, GNorm = 0.7063, lr_0 = 8.7966e-04
Loss = 7.0315e-02, PNorm = 94.8037, GNorm = 0.3937, lr_0 = 8.7906e-04
Loss = 7.0630e-02, PNorm = 94.8935, GNorm = 0.9131, lr_0 = 8.7846e-04
Loss = 6.1997e-02, PNorm = 94.9815, GNorm = 0.3700, lr_0 = 8.7785e-04
Loss = 6.7963e-02, PNorm = 95.0692, GNorm = 0.7818, lr_0 = 8.7725e-04
Loss = 6.9061e-02, PNorm = 95.1452, GNorm = 0.4890, lr_0 = 8.7665e-04
Loss = 7.4104e-02, PNorm = 95.2316, GNorm = 0.3562, lr_0 = 8.7605e-04
Loss = 6.4181e-02, PNorm = 95.3136, GNorm = 0.4496, lr_0 = 8.7545e-04
Loss = 7.5356e-02, PNorm = 95.3913, GNorm = 0.4224, lr_0 = 8.7485e-04
Loss = 7.0792e-02, PNorm = 95.4650, GNorm = 0.6629, lr_0 = 8.7425e-04
Loss = 6.8812e-02, PNorm = 95.5362, GNorm = 0.7171, lr_0 = 8.7365e-04
Loss = 7.0765e-02, PNorm = 95.6244, GNorm = 0.6528, lr_0 = 8.7306e-04
Loss = 6.1077e-02, PNorm = 95.7031, GNorm = 0.4494, lr_0 = 8.7246e-04
Loss = 6.5359e-02, PNorm = 95.7898, GNorm = 0.3690, lr_0 = 8.7186e-04
Loss = 7.8002e-02, PNorm = 95.8656, GNorm = 0.4669, lr_0 = 8.7126e-04
Loss = 6.7716e-02, PNorm = 95.9430, GNorm = 0.8664, lr_0 = 8.7067e-04
Loss = 7.3852e-02, PNorm = 96.0343, GNorm = 0.6077, lr_0 = 8.7007e-04
Loss = 7.0329e-02, PNorm = 96.1155, GNorm = 0.6213, lr_0 = 8.6947e-04
Loss = 8.5450e-02, PNorm = 96.2024, GNorm = 0.7585, lr_0 = 8.6888e-04
Loss = 7.8965e-02, PNorm = 96.2922, GNorm = 0.5602, lr_0 = 8.6828e-04
Loss = 7.1438e-02, PNorm = 96.3859, GNorm = 0.3968, lr_0 = 8.6769e-04
Loss = 6.5440e-02, PNorm = 96.4669, GNorm = 0.6645, lr_0 = 8.6709e-04
Loss = 7.8353e-02, PNorm = 96.5556, GNorm = 0.7612, lr_0 = 8.6650e-04
Loss = 7.4395e-02, PNorm = 96.6508, GNorm = 0.9992, lr_0 = 8.6590e-04
Loss = 7.1765e-02, PNorm = 96.7415, GNorm = 0.6032, lr_0 = 8.6531e-04
Loss = 8.7390e-02, PNorm = 96.8348, GNorm = 0.9687, lr_0 = 8.6472e-04
Loss = 7.2853e-02, PNorm = 96.9247, GNorm = 0.5263, lr_0 = 8.6413e-04
Loss = 8.0060e-02, PNorm = 97.0180, GNorm = 0.6256, lr_0 = 8.6353e-04
Loss = 6.9206e-02, PNorm = 97.1015, GNorm = 1.1372, lr_0 = 8.6294e-04
Loss = 6.7758e-02, PNorm = 97.1910, GNorm = 0.4328, lr_0 = 8.6235e-04
Loss = 7.5310e-02, PNorm = 97.2656, GNorm = 0.3125, lr_0 = 8.6176e-04
Loss = 6.6376e-02, PNorm = 97.3505, GNorm = 1.1581, lr_0 = 8.6117e-04
Loss = 8.4768e-02, PNorm = 97.4318, GNorm = 0.4182, lr_0 = 8.6058e-04
Loss = 7.7852e-02, PNorm = 97.5253, GNorm = 0.3645, lr_0 = 8.5999e-04
Loss = 7.3011e-02, PNorm = 97.6153, GNorm = 0.5139, lr_0 = 8.5940e-04
Loss = 7.2381e-02, PNorm = 97.7058, GNorm = 0.4929, lr_0 = 8.5881e-04
Loss = 8.2898e-02, PNorm = 97.7907, GNorm = 0.5696, lr_0 = 8.5823e-04
Loss = 6.7900e-02, PNorm = 97.8866, GNorm = 0.7903, lr_0 = 8.5764e-04
Loss = 8.5765e-02, PNorm = 97.9747, GNorm = 1.0676, lr_0 = 8.5705e-04
Loss = 8.3398e-02, PNorm = 98.0870, GNorm = 0.7184, lr_0 = 8.5646e-04
Loss = 7.5852e-02, PNorm = 98.1845, GNorm = 0.5553, lr_0 = 8.5588e-04
Loss = 7.2175e-02, PNorm = 98.2852, GNorm = 0.6223, lr_0 = 8.5529e-04
Loss = 6.9362e-02, PNorm = 98.3718, GNorm = 0.3772, lr_0 = 8.5470e-04
Loss = 8.0776e-02, PNorm = 98.4607, GNorm = 0.4001, lr_0 = 8.5412e-04
Loss = 7.7975e-02, PNorm = 98.5532, GNorm = 0.9456, lr_0 = 8.5353e-04
Loss = 7.2531e-02, PNorm = 98.6422, GNorm = 0.3842, lr_0 = 8.5295e-04
Loss = 7.2758e-02, PNorm = 98.7318, GNorm = 0.4194, lr_0 = 8.5236e-04
Loss = 8.3567e-02, PNorm = 98.8263, GNorm = 0.2945, lr_0 = 8.5178e-04
Loss = 5.9858e-02, PNorm = 98.9099, GNorm = 0.6128, lr_0 = 8.5120e-04
Loss = 7.6523e-02, PNorm = 98.9968, GNorm = 0.6273, lr_0 = 8.5061e-04
Loss = 7.0326e-02, PNorm = 99.0726, GNorm = 0.4439, lr_0 = 8.5003e-04
Loss = 8.0340e-02, PNorm = 99.1554, GNorm = 0.7472, lr_0 = 8.4945e-04
Loss = 8.0775e-02, PNorm = 99.2438, GNorm = 0.7972, lr_0 = 8.4887e-04
Loss = 7.5360e-02, PNorm = 99.3338, GNorm = 0.5286, lr_0 = 8.4828e-04
Validation mae = 0.501939
Epoch 4
Loss = 5.1133e-02, PNorm = 99.4105, GNorm = 0.5497, lr_0 = 8.4770e-04
Loss = 5.1136e-02, PNorm = 99.4775, GNorm = 0.2482, lr_0 = 8.4712e-04
Loss = 4.3131e-02, PNorm = 99.5387, GNorm = 0.7630, lr_0 = 8.4654e-04
Loss = 4.6358e-02, PNorm = 99.5911, GNorm = 0.4175, lr_0 = 8.4596e-04
Loss = 4.2403e-02, PNorm = 99.6489, GNorm = 0.3492, lr_0 = 8.4538e-04
Loss = 4.5163e-02, PNorm = 99.7001, GNorm = 0.3263, lr_0 = 8.4480e-04
Loss = 4.2894e-02, PNorm = 99.7490, GNorm = 0.6580, lr_0 = 8.4423e-04
Loss = 5.0979e-02, PNorm = 99.7931, GNorm = 0.2894, lr_0 = 8.4365e-04
Loss = 4.7125e-02, PNorm = 99.8515, GNorm = 0.5258, lr_0 = 8.4307e-04
Loss = 5.6522e-02, PNorm = 99.9060, GNorm = 0.3519, lr_0 = 8.4249e-04
Loss = 4.8578e-02, PNorm = 99.9663, GNorm = 0.6170, lr_0 = 8.4191e-04
Loss = 4.8575e-02, PNorm = 100.0116, GNorm = 0.4034, lr_0 = 8.4134e-04
Loss = 4.2114e-02, PNorm = 100.0636, GNorm = 0.4505, lr_0 = 8.4076e-04
Loss = 5.2607e-02, PNorm = 100.1151, GNorm = 0.4769, lr_0 = 8.4019e-04
Loss = 4.6292e-02, PNorm = 100.1779, GNorm = 0.4343, lr_0 = 8.3961e-04
Loss = 4.5577e-02, PNorm = 100.2362, GNorm = 0.6103, lr_0 = 8.3903e-04
Loss = 4.5151e-02, PNorm = 100.2879, GNorm = 0.2932, lr_0 = 8.3846e-04
Loss = 4.6859e-02, PNorm = 100.3453, GNorm = 0.2885, lr_0 = 8.3789e-04
Loss = 4.0383e-02, PNorm = 100.3946, GNorm = 0.4451, lr_0 = 8.3731e-04
Loss = 4.8557e-02, PNorm = 100.4407, GNorm = 0.3562, lr_0 = 8.3674e-04
Loss = 4.3641e-02, PNorm = 100.4917, GNorm = 0.3997, lr_0 = 8.3616e-04
Loss = 4.4080e-02, PNorm = 100.5484, GNorm = 0.3326, lr_0 = 8.3559e-04
Loss = 3.9629e-02, PNorm = 100.6057, GNorm = 0.6364, lr_0 = 8.3502e-04
Loss = 4.2165e-02, PNorm = 100.6623, GNorm = 1.0471, lr_0 = 8.3445e-04
Loss = 4.0087e-02, PNorm = 100.7139, GNorm = 0.3838, lr_0 = 8.3388e-04
Loss = 4.5778e-02, PNorm = 100.7642, GNorm = 0.3864, lr_0 = 8.3330e-04
Loss = 5.1923e-02, PNorm = 100.8189, GNorm = 0.3278, lr_0 = 8.3273e-04
Loss = 4.0812e-02, PNorm = 100.8712, GNorm = 0.6707, lr_0 = 8.3216e-04
Loss = 4.5677e-02, PNorm = 100.9268, GNorm = 0.5394, lr_0 = 8.3159e-04
Loss = 4.6158e-02, PNorm = 100.9857, GNorm = 1.0066, lr_0 = 8.3102e-04
Loss = 4.1118e-02, PNorm = 101.0437, GNorm = 0.6218, lr_0 = 8.3045e-04
Loss = 5.3064e-02, PNorm = 101.0976, GNorm = 0.6947, lr_0 = 8.2988e-04
Loss = 4.3648e-02, PNorm = 101.1554, GNorm = 0.7034, lr_0 = 8.2932e-04
Loss = 4.6079e-02, PNorm = 101.2215, GNorm = 0.7879, lr_0 = 8.2875e-04
Loss = 4.5709e-02, PNorm = 101.2776, GNorm = 0.2901, lr_0 = 8.2818e-04
Loss = 4.6669e-02, PNorm = 101.3328, GNorm = 0.3498, lr_0 = 8.2761e-04
Loss = 4.3913e-02, PNorm = 101.3918, GNorm = 0.4225, lr_0 = 8.2705e-04
Loss = 4.5112e-02, PNorm = 101.4476, GNorm = 0.5913, lr_0 = 8.2648e-04
Loss = 4.8447e-02, PNorm = 101.5160, GNorm = 0.2780, lr_0 = 8.2591e-04
Loss = 5.0059e-02, PNorm = 101.5754, GNorm = 0.6857, lr_0 = 8.2535e-04
Loss = 5.0480e-02, PNorm = 101.6468, GNorm = 0.5339, lr_0 = 8.2478e-04
Loss = 4.7757e-02, PNorm = 101.7046, GNorm = 0.4546, lr_0 = 8.2422e-04
Loss = 4.8337e-02, PNorm = 101.7670, GNorm = 0.4065, lr_0 = 8.2365e-04
Loss = 4.9886e-02, PNorm = 101.8285, GNorm = 0.5409, lr_0 = 8.2309e-04
Loss = 4.9441e-02, PNorm = 101.8953, GNorm = 0.4640, lr_0 = 8.2252e-04
Loss = 4.1863e-02, PNorm = 101.9598, GNorm = 0.3362, lr_0 = 8.2196e-04
Loss = 5.3721e-02, PNorm = 102.0287, GNorm = 0.9233, lr_0 = 8.2140e-04
Loss = 4.9313e-02, PNorm = 102.0961, GNorm = 0.9906, lr_0 = 8.2084e-04
Loss = 4.8117e-02, PNorm = 102.1694, GNorm = 0.4551, lr_0 = 8.2027e-04
Loss = 5.2440e-02, PNorm = 102.2284, GNorm = 0.2996, lr_0 = 8.1971e-04
Loss = 4.3210e-02, PNorm = 102.2941, GNorm = 0.7838, lr_0 = 8.1915e-04
Loss = 5.1290e-02, PNorm = 102.3586, GNorm = 0.2708, lr_0 = 8.1859e-04
Loss = 4.7566e-02, PNorm = 102.4230, GNorm = 0.4180, lr_0 = 8.1803e-04
Loss = 4.3881e-02, PNorm = 102.4859, GNorm = 0.4983, lr_0 = 8.1747e-04
Loss = 5.5227e-02, PNorm = 102.5428, GNorm = 0.3268, lr_0 = 8.1691e-04
Loss = 4.3265e-02, PNorm = 102.6018, GNorm = 0.9556, lr_0 = 8.1635e-04
Loss = 4.8957e-02, PNorm = 102.6654, GNorm = 0.5300, lr_0 = 8.1579e-04
Loss = 5.0105e-02, PNorm = 102.7316, GNorm = 0.4469, lr_0 = 8.1523e-04
Loss = 5.1396e-02, PNorm = 102.7939, GNorm = 1.1441, lr_0 = 8.1467e-04
Loss = 4.3402e-02, PNorm = 102.8623, GNorm = 0.4258, lr_0 = 8.1411e-04
Loss = 4.7530e-02, PNorm = 102.9320, GNorm = 0.5117, lr_0 = 8.1355e-04
Loss = 4.9593e-02, PNorm = 103.0010, GNorm = 0.4878, lr_0 = 8.1300e-04
Loss = 4.2684e-02, PNorm = 103.0673, GNorm = 0.6042, lr_0 = 8.1244e-04
Loss = 4.8364e-02, PNorm = 103.1322, GNorm = 0.3331, lr_0 = 8.1188e-04
Loss = 4.7285e-02, PNorm = 103.1987, GNorm = 0.6731, lr_0 = 8.1133e-04
Loss = 5.4365e-02, PNorm = 103.2770, GNorm = 0.4281, lr_0 = 8.1077e-04
Loss = 4.6835e-02, PNorm = 103.3582, GNorm = 0.4826, lr_0 = 8.1022e-04
Loss = 5.1447e-02, PNorm = 103.4212, GNorm = 0.3312, lr_0 = 8.0966e-04
Loss = 5.2006e-02, PNorm = 103.4860, GNorm = 0.2936, lr_0 = 8.0911e-04
Loss = 4.8531e-02, PNorm = 103.5610, GNorm = 0.5820, lr_0 = 8.0855e-04
Loss = 5.3637e-02, PNorm = 103.6330, GNorm = 0.6066, lr_0 = 8.0800e-04
Loss = 5.0311e-02, PNorm = 103.7064, GNorm = 0.5881, lr_0 = 8.0745e-04
Loss = 5.3105e-02, PNorm = 103.7810, GNorm = 0.6257, lr_0 = 8.0689e-04
Loss = 5.3765e-02, PNorm = 103.8477, GNorm = 0.5829, lr_0 = 8.0634e-04
Loss = 5.9402e-02, PNorm = 103.9247, GNorm = 0.3691, lr_0 = 8.0579e-04
Loss = 5.3473e-02, PNorm = 103.9918, GNorm = 0.5282, lr_0 = 8.0523e-04
Loss = 5.7418e-02, PNorm = 104.0653, GNorm = 0.3157, lr_0 = 8.0468e-04
Loss = 5.6938e-02, PNorm = 104.1463, GNorm = 0.4690, lr_0 = 8.0413e-04
Loss = 5.7604e-02, PNorm = 104.2330, GNorm = 0.5289, lr_0 = 8.0358e-04
Loss = 5.2043e-02, PNorm = 104.3122, GNorm = 0.5906, lr_0 = 8.0303e-04
Loss = 5.0640e-02, PNorm = 104.3896, GNorm = 0.6840, lr_0 = 8.0248e-04
Loss = 5.4319e-02, PNorm = 104.4608, GNorm = 0.3937, lr_0 = 8.0193e-04
Loss = 5.5782e-02, PNorm = 104.5360, GNorm = 0.4328, lr_0 = 8.0138e-04
Loss = 4.5158e-02, PNorm = 104.6095, GNorm = 0.4466, lr_0 = 8.0083e-04
Loss = 5.7824e-02, PNorm = 104.6895, GNorm = 0.5368, lr_0 = 8.0028e-04
Loss = 5.4165e-02, PNorm = 104.7758, GNorm = 0.5087, lr_0 = 7.9974e-04
Loss = 5.7477e-02, PNorm = 104.8559, GNorm = 0.4297, lr_0 = 7.9919e-04
Loss = 4.7284e-02, PNorm = 104.9376, GNorm = 0.3692, lr_0 = 7.9864e-04
Loss = 4.8973e-02, PNorm = 105.0163, GNorm = 0.3758, lr_0 = 7.9809e-04
Loss = 5.3181e-02, PNorm = 105.0895, GNorm = 0.4130, lr_0 = 7.9755e-04
Loss = 5.0331e-02, PNorm = 105.1712, GNorm = 0.5370, lr_0 = 7.9700e-04
Loss = 5.0112e-02, PNorm = 105.2437, GNorm = 0.4111, lr_0 = 7.9645e-04
Loss = 6.9491e-02, PNorm = 105.3299, GNorm = 0.2911, lr_0 = 7.9591e-04
Loss = 5.0777e-02, PNorm = 105.4030, GNorm = 0.4685, lr_0 = 7.9536e-04
Loss = 5.7584e-02, PNorm = 105.4935, GNorm = 1.1786, lr_0 = 7.9482e-04
Loss = 4.7318e-02, PNorm = 105.5826, GNorm = 0.4681, lr_0 = 7.9427e-04
Loss = 4.9455e-02, PNorm = 105.6586, GNorm = 0.2952, lr_0 = 7.9373e-04
Loss = 5.6241e-02, PNorm = 105.7265, GNorm = 0.3958, lr_0 = 7.9319e-04
Loss = 4.8052e-02, PNorm = 105.7981, GNorm = 0.3462, lr_0 = 7.9264e-04
Loss = 5.1309e-02, PNorm = 105.8671, GNorm = 0.3126, lr_0 = 7.9210e-04
Loss = 5.5009e-02, PNorm = 105.9521, GNorm = 0.6894, lr_0 = 7.9156e-04
Loss = 6.0458e-02, PNorm = 106.0396, GNorm = 0.5084, lr_0 = 7.9101e-04
Loss = 5.6101e-02, PNorm = 106.1243, GNorm = 0.5976, lr_0 = 7.9047e-04
Loss = 5.1241e-02, PNorm = 106.2113, GNorm = 0.3651, lr_0 = 7.8993e-04
Loss = 5.4742e-02, PNorm = 106.2917, GNorm = 0.8023, lr_0 = 7.8939e-04
Loss = 5.4822e-02, PNorm = 106.3716, GNorm = 0.6868, lr_0 = 7.8885e-04
Loss = 5.8283e-02, PNorm = 106.4590, GNorm = 0.3846, lr_0 = 7.8831e-04
Loss = 5.5534e-02, PNorm = 106.5517, GNorm = 0.3538, lr_0 = 7.8777e-04
Loss = 5.8490e-02, PNorm = 106.6298, GNorm = 0.3041, lr_0 = 7.8723e-04
Loss = 5.1911e-02, PNorm = 106.7130, GNorm = 0.2401, lr_0 = 7.8669e-04
Loss = 5.0587e-02, PNorm = 106.7853, GNorm = 0.5309, lr_0 = 7.8615e-04
Loss = 5.5290e-02, PNorm = 106.8610, GNorm = 0.7839, lr_0 = 7.8561e-04
Loss = 6.2197e-02, PNorm = 106.9375, GNorm = 0.5857, lr_0 = 7.8507e-04
Loss = 5.5219e-02, PNorm = 107.0291, GNorm = 0.3558, lr_0 = 7.8454e-04
Loss = 5.6077e-02, PNorm = 107.1073, GNorm = 0.6114, lr_0 = 7.8400e-04
Loss = 4.3484e-02, PNorm = 107.1815, GNorm = 0.5215, lr_0 = 7.8346e-04
Loss = 4.9305e-02, PNorm = 107.2501, GNorm = 0.3346, lr_0 = 7.8293e-04
Loss = 6.1248e-02, PNorm = 107.3376, GNorm = 0.3192, lr_0 = 7.8239e-04
Loss = 5.1574e-02, PNorm = 107.4321, GNorm = 0.4677, lr_0 = 7.8185e-04
Loss = 5.8851e-02, PNorm = 107.5189, GNorm = 0.3042, lr_0 = 7.8132e-04
Validation mae = 0.496986
Epoch 5
Loss = 4.0692e-02, PNorm = 107.5892, GNorm = 0.2846, lr_0 = 7.8078e-04
Loss = 3.8564e-02, PNorm = 107.6434, GNorm = 0.2635, lr_0 = 7.8025e-04
Loss = 3.7036e-02, PNorm = 107.6937, GNorm = 0.2950, lr_0 = 7.7971e-04
Loss = 3.4464e-02, PNorm = 107.7454, GNorm = 0.2934, lr_0 = 7.7918e-04
Loss = 4.0165e-02, PNorm = 107.7928, GNorm = 0.3971, lr_0 = 7.7864e-04
Loss = 3.5289e-02, PNorm = 107.8448, GNorm = 0.3077, lr_0 = 7.7811e-04
Loss = 3.4099e-02, PNorm = 107.8973, GNorm = 0.2482, lr_0 = 7.7758e-04
Loss = 3.3378e-02, PNorm = 107.9485, GNorm = 0.3634, lr_0 = 7.7705e-04
Loss = 3.1580e-02, PNorm = 107.9959, GNorm = 0.3009, lr_0 = 7.7651e-04
Loss = 3.9537e-02, PNorm = 108.0473, GNorm = 0.2390, lr_0 = 7.7598e-04
Loss = 3.4559e-02, PNorm = 108.0970, GNorm = 0.2694, lr_0 = 7.7545e-04
Loss = 3.4249e-02, PNorm = 108.1442, GNorm = 0.3727, lr_0 = 7.7492e-04
Loss = 3.7830e-02, PNorm = 108.1987, GNorm = 0.2879, lr_0 = 7.7439e-04
Loss = 3.4966e-02, PNorm = 108.2463, GNorm = 0.2961, lr_0 = 7.7386e-04
Loss = 3.4579e-02, PNorm = 108.3027, GNorm = 0.7378, lr_0 = 7.7333e-04
Loss = 3.3607e-02, PNorm = 108.3513, GNorm = 0.5773, lr_0 = 7.7280e-04
Loss = 3.2095e-02, PNorm = 108.4014, GNorm = 0.3794, lr_0 = 7.7227e-04
Loss = 3.1639e-02, PNorm = 108.4402, GNorm = 0.6861, lr_0 = 7.7174e-04
Loss = 3.6001e-02, PNorm = 108.4827, GNorm = 0.4616, lr_0 = 7.7121e-04
Loss = 3.2398e-02, PNorm = 108.5280, GNorm = 0.5108, lr_0 = 7.7068e-04
Loss = 2.9687e-02, PNorm = 108.5735, GNorm = 0.5377, lr_0 = 7.7015e-04
Loss = 3.3612e-02, PNorm = 108.6124, GNorm = 0.4212, lr_0 = 7.6963e-04
Loss = 3.6546e-02, PNorm = 108.6586, GNorm = 0.3384, lr_0 = 7.6910e-04
Loss = 3.0354e-02, PNorm = 108.7082, GNorm = 0.3658, lr_0 = 7.6857e-04
Loss = 3.0417e-02, PNorm = 108.7590, GNorm = 0.2487, lr_0 = 7.6805e-04
Loss = 3.3929e-02, PNorm = 108.8086, GNorm = 0.2404, lr_0 = 7.6752e-04
Loss = 3.7827e-02, PNorm = 108.8552, GNorm = 0.4035, lr_0 = 7.6699e-04
Loss = 3.8965e-02, PNorm = 108.9026, GNorm = 0.5859, lr_0 = 7.6647e-04
Loss = 3.1114e-02, PNorm = 108.9572, GNorm = 0.4300, lr_0 = 7.6594e-04
Loss = 3.6458e-02, PNorm = 109.0009, GNorm = 0.3462, lr_0 = 7.6542e-04
Loss = 3.0633e-02, PNorm = 109.0427, GNorm = 0.4374, lr_0 = 7.6489e-04
Loss = 3.8738e-02, PNorm = 109.0978, GNorm = 0.3025, lr_0 = 7.6437e-04
Loss = 3.4251e-02, PNorm = 109.1530, GNorm = 0.2762, lr_0 = 7.6385e-04
Loss = 3.3039e-02, PNorm = 109.2149, GNorm = 0.4952, lr_0 = 7.6332e-04
Loss = 4.0767e-02, PNorm = 109.2649, GNorm = 0.5443, lr_0 = 7.6280e-04
Loss = 3.3555e-02, PNorm = 109.3183, GNorm = 0.2919, lr_0 = 7.6228e-04
Loss = 4.2414e-02, PNorm = 109.3748, GNorm = 0.5882, lr_0 = 7.6176e-04
Loss = 4.0269e-02, PNorm = 109.4314, GNorm = 0.8323, lr_0 = 7.6123e-04
Loss = 3.4918e-02, PNorm = 109.4896, GNorm = 0.5102, lr_0 = 7.6071e-04
Loss = 3.4253e-02, PNorm = 109.5407, GNorm = 0.5895, lr_0 = 7.6019e-04
Loss = 3.3572e-02, PNorm = 109.5897, GNorm = 0.2968, lr_0 = 7.5967e-04
Loss = 3.5156e-02, PNorm = 109.6366, GNorm = 0.2260, lr_0 = 7.5915e-04
Loss = 3.4493e-02, PNorm = 109.6959, GNorm = 1.0174, lr_0 = 7.5863e-04
Loss = 4.0722e-02, PNorm = 109.7448, GNorm = 0.5588, lr_0 = 7.5811e-04
Loss = 3.6783e-02, PNorm = 109.8007, GNorm = 0.3093, lr_0 = 7.5759e-04
Loss = 3.3923e-02, PNorm = 109.8647, GNorm = 0.3613, lr_0 = 7.5707e-04
Loss = 4.3162e-02, PNorm = 109.9198, GNorm = 0.2852, lr_0 = 7.5655e-04
Loss = 3.4867e-02, PNorm = 109.9742, GNorm = 0.2568, lr_0 = 7.5603e-04
Loss = 3.7675e-02, PNorm = 110.0254, GNorm = 0.2719, lr_0 = 7.5552e-04
Loss = 3.5924e-02, PNorm = 110.0848, GNorm = 0.6737, lr_0 = 7.5500e-04
Loss = 2.9831e-02, PNorm = 110.1437, GNorm = 0.9554, lr_0 = 7.5448e-04
Loss = 3.8528e-02, PNorm = 110.1975, GNorm = 0.4920, lr_0 = 7.5397e-04
Loss = 3.7970e-02, PNorm = 110.2496, GNorm = 0.4678, lr_0 = 7.5345e-04
Loss = 3.5959e-02, PNorm = 110.3116, GNorm = 0.4008, lr_0 = 7.5293e-04
Loss = 3.6780e-02, PNorm = 110.3713, GNorm = 0.2746, lr_0 = 7.5242e-04
Loss = 3.5170e-02, PNorm = 110.4298, GNorm = 0.3754, lr_0 = 7.5190e-04
Loss = 4.1072e-02, PNorm = 110.4855, GNorm = 0.7944, lr_0 = 7.5139e-04
Loss = 4.2996e-02, PNorm = 110.5455, GNorm = 0.6002, lr_0 = 7.5087e-04
Loss = 3.6786e-02, PNorm = 110.6101, GNorm = 0.9272, lr_0 = 7.5036e-04
Loss = 3.8061e-02, PNorm = 110.6746, GNorm = 0.6326, lr_0 = 7.4984e-04
Loss = 3.4360e-02, PNorm = 110.7392, GNorm = 0.3428, lr_0 = 7.4933e-04
Loss = 3.6451e-02, PNorm = 110.7962, GNorm = 0.3432, lr_0 = 7.4882e-04
Loss = 4.6346e-02, PNorm = 110.8541, GNorm = 0.2748, lr_0 = 7.4830e-04
Loss = 3.6532e-02, PNorm = 110.9180, GNorm = 0.5081, lr_0 = 7.4779e-04
Loss = 3.5943e-02, PNorm = 110.9871, GNorm = 0.7119, lr_0 = 7.4728e-04
Loss = 3.6570e-02, PNorm = 111.0461, GNorm = 0.3363, lr_0 = 7.4677e-04
Loss = 3.6547e-02, PNorm = 111.1127, GNorm = 0.5229, lr_0 = 7.4625e-04
Loss = 3.4707e-02, PNorm = 111.1833, GNorm = 0.2760, lr_0 = 7.4574e-04
Loss = 3.6505e-02, PNorm = 111.2424, GNorm = 0.4481, lr_0 = 7.4523e-04
Loss = 3.2948e-02, PNorm = 111.3055, GNorm = 0.6441, lr_0 = 7.4472e-04
Loss = 3.6571e-02, PNorm = 111.3574, GNorm = 0.3268, lr_0 = 7.4421e-04
Loss = 3.7561e-02, PNorm = 111.4136, GNorm = 0.4274, lr_0 = 7.4370e-04
Loss = 4.2159e-02, PNorm = 111.4694, GNorm = 0.8112, lr_0 = 7.4319e-04
Loss = 4.2162e-02, PNorm = 111.5356, GNorm = 0.3483, lr_0 = 7.4268e-04
Loss = 3.8133e-02, PNorm = 111.6093, GNorm = 0.7934, lr_0 = 7.4217e-04
Loss = 3.6195e-02, PNorm = 111.6796, GNorm = 0.2630, lr_0 = 7.4167e-04
Loss = 3.3108e-02, PNorm = 111.7423, GNorm = 0.3106, lr_0 = 7.4116e-04
Loss = 3.5089e-02, PNorm = 111.8053, GNorm = 0.4041, lr_0 = 7.4065e-04
Loss = 3.7562e-02, PNorm = 111.8611, GNorm = 0.6157, lr_0 = 7.4014e-04
Loss = 3.4072e-02, PNorm = 111.9179, GNorm = 0.5599, lr_0 = 7.3964e-04
Loss = 4.5549e-02, PNorm = 111.9740, GNorm = 0.3909, lr_0 = 7.3913e-04
Loss = 3.7293e-02, PNorm = 112.0395, GNorm = 0.6533, lr_0 = 7.3862e-04
Loss = 3.9830e-02, PNorm = 112.1058, GNorm = 0.3412, lr_0 = 7.3812e-04
Loss = 3.8760e-02, PNorm = 112.1721, GNorm = 0.6368, lr_0 = 7.3761e-04
Loss = 3.5843e-02, PNorm = 112.2340, GNorm = 0.9799, lr_0 = 7.3711e-04
Loss = 4.1429e-02, PNorm = 112.2888, GNorm = 0.4246, lr_0 = 7.3660e-04
Loss = 3.7056e-02, PNorm = 112.3571, GNorm = 0.3053, lr_0 = 7.3610e-04
Loss = 3.9836e-02, PNorm = 112.4289, GNorm = 0.3412, lr_0 = 7.3559e-04
Loss = 4.5674e-02, PNorm = 112.4958, GNorm = 0.3772, lr_0 = 7.3509e-04
Loss = 3.6965e-02, PNorm = 112.5699, GNorm = 0.2737, lr_0 = 7.3458e-04
Loss = 3.7222e-02, PNorm = 112.6417, GNorm = 0.4284, lr_0 = 7.3408e-04
Loss = 3.5716e-02, PNorm = 112.6992, GNorm = 0.4634, lr_0 = 7.3358e-04
Loss = 4.0796e-02, PNorm = 112.7543, GNorm = 0.5081, lr_0 = 7.3308e-04
Loss = 3.4933e-02, PNorm = 112.8048, GNorm = 0.3942, lr_0 = 7.3257e-04
Loss = 4.1344e-02, PNorm = 112.8705, GNorm = 0.4511, lr_0 = 7.3207e-04
Loss = 4.2055e-02, PNorm = 112.9353, GNorm = 0.2916, lr_0 = 7.3157e-04
Loss = 4.7044e-02, PNorm = 113.0070, GNorm = 0.3237, lr_0 = 7.3107e-04
Loss = 3.6698e-02, PNorm = 113.0907, GNorm = 0.4399, lr_0 = 7.3057e-04
Loss = 3.7377e-02, PNorm = 113.1628, GNorm = 0.4024, lr_0 = 7.3007e-04
Loss = 3.7086e-02, PNorm = 113.2333, GNorm = 0.5722, lr_0 = 7.2957e-04
Loss = 3.8484e-02, PNorm = 113.3036, GNorm = 0.5601, lr_0 = 7.2907e-04
Loss = 3.8128e-02, PNorm = 113.3647, GNorm = 0.2956, lr_0 = 7.2857e-04
Loss = 4.1598e-02, PNorm = 113.4264, GNorm = 0.4540, lr_0 = 7.2807e-04
Loss = 4.4565e-02, PNorm = 113.5003, GNorm = 0.3309, lr_0 = 7.2757e-04
Loss = 3.7518e-02, PNorm = 113.5786, GNorm = 0.2962, lr_0 = 7.2707e-04
Loss = 4.1944e-02, PNorm = 113.6563, GNorm = 0.6851, lr_0 = 7.2657e-04
Loss = 3.6791e-02, PNorm = 113.7272, GNorm = 0.7612, lr_0 = 7.2608e-04
Loss = 3.8771e-02, PNorm = 113.7970, GNorm = 0.3897, lr_0 = 7.2558e-04
Loss = 3.9985e-02, PNorm = 113.8653, GNorm = 0.4568, lr_0 = 7.2508e-04
Loss = 3.8023e-02, PNorm = 113.9297, GNorm = 0.3450, lr_0 = 7.2458e-04
Loss = 4.0886e-02, PNorm = 113.9935, GNorm = 0.5365, lr_0 = 7.2409e-04
Loss = 4.0622e-02, PNorm = 114.0576, GNorm = 0.3885, lr_0 = 7.2359e-04
Loss = 4.1218e-02, PNorm = 114.1154, GNorm = 0.3247, lr_0 = 7.2310e-04
Loss = 3.7598e-02, PNorm = 114.1834, GNorm = 0.4821, lr_0 = 7.2260e-04
Loss = 4.8059e-02, PNorm = 114.2653, GNorm = 0.2181, lr_0 = 7.2211e-04
Loss = 4.6311e-02, PNorm = 114.3445, GNorm = 0.2720, lr_0 = 7.2161e-04
Loss = 3.8605e-02, PNorm = 114.4114, GNorm = 0.4539, lr_0 = 7.2112e-04
Loss = 4.7401e-02, PNorm = 114.4861, GNorm = 0.2833, lr_0 = 7.2062e-04
Loss = 4.5527e-02, PNorm = 114.5689, GNorm = 0.6065, lr_0 = 7.2013e-04
Loss = 4.3639e-02, PNorm = 114.6466, GNorm = 0.2648, lr_0 = 7.1964e-04
Validation mae = 0.494097
Epoch 6
Loss = 3.2785e-02, PNorm = 114.7024, GNorm = 0.8526, lr_0 = 7.1914e-04
Loss = 3.1805e-02, PNorm = 114.7520, GNorm = 0.6336, lr_0 = 7.1865e-04
Loss = 3.5652e-02, PNorm = 114.7987, GNorm = 0.3634, lr_0 = 7.1816e-04
Loss = 3.5186e-02, PNorm = 114.8505, GNorm = 0.5367, lr_0 = 7.1767e-04
Loss = 3.3272e-02, PNorm = 114.8995, GNorm = 0.5151, lr_0 = 7.1717e-04
Loss = 3.0317e-02, PNorm = 114.9559, GNorm = 0.4586, lr_0 = 7.1668e-04
Loss = 2.7158e-02, PNorm = 114.9990, GNorm = 0.6721, lr_0 = 7.1619e-04
Loss = 3.0621e-02, PNorm = 115.0422, GNorm = 0.6193, lr_0 = 7.1570e-04
Loss = 2.9853e-02, PNorm = 115.0911, GNorm = 0.6910, lr_0 = 7.1521e-04
Loss = 2.9795e-02, PNorm = 115.1398, GNorm = 0.5771, lr_0 = 7.1472e-04
Loss = 3.1897e-02, PNorm = 115.1897, GNorm = 0.4580, lr_0 = 7.1423e-04
Loss = 2.4049e-02, PNorm = 115.2323, GNorm = 0.4629, lr_0 = 7.1374e-04
Loss = 3.2661e-02, PNorm = 115.2668, GNorm = 0.5391, lr_0 = 7.1325e-04
Loss = 2.8411e-02, PNorm = 115.3102, GNorm = 0.2104, lr_0 = 7.1277e-04
Loss = 2.6953e-02, PNorm = 115.3601, GNorm = 0.2655, lr_0 = 7.1228e-04
Loss = 2.7859e-02, PNorm = 115.4057, GNorm = 0.4643, lr_0 = 7.1179e-04
Loss = 2.9177e-02, PNorm = 115.4542, GNorm = 0.3765, lr_0 = 7.1130e-04
Loss = 2.4300e-02, PNorm = 115.5010, GNorm = 0.3253, lr_0 = 7.1081e-04
Loss = 2.9107e-02, PNorm = 115.5394, GNorm = 0.6002, lr_0 = 7.1033e-04
Loss = 2.6494e-02, PNorm = 115.5846, GNorm = 0.4639, lr_0 = 7.0984e-04
Loss = 2.4275e-02, PNorm = 115.6244, GNorm = 0.2323, lr_0 = 7.0935e-04
Loss = 2.9408e-02, PNorm = 115.6699, GNorm = 0.2215, lr_0 = 7.0887e-04
Loss = 2.5065e-02, PNorm = 115.7119, GNorm = 0.2843, lr_0 = 7.0838e-04
Loss = 2.6410e-02, PNorm = 115.7579, GNorm = 1.1343, lr_0 = 7.0790e-04
Loss = 2.7184e-02, PNorm = 115.7928, GNorm = 0.6534, lr_0 = 7.0741e-04
Loss = 2.8414e-02, PNorm = 115.8337, GNorm = 0.3414, lr_0 = 7.0693e-04
Loss = 2.3923e-02, PNorm = 115.8768, GNorm = 0.1756, lr_0 = 7.0644e-04
Loss = 3.0082e-02, PNorm = 115.9238, GNorm = 0.6146, lr_0 = 7.0596e-04
Loss = 2.5322e-02, PNorm = 115.9743, GNorm = 0.4192, lr_0 = 7.0548e-04
Loss = 2.9029e-02, PNorm = 116.0204, GNorm = 0.2762, lr_0 = 7.0499e-04
Loss = 2.5219e-02, PNorm = 116.0652, GNorm = 0.3466, lr_0 = 7.0451e-04
Loss = 2.7857e-02, PNorm = 116.1025, GNorm = 0.2663, lr_0 = 7.0403e-04
Loss = 2.4128e-02, PNorm = 116.1440, GNorm = 0.6428, lr_0 = 7.0354e-04
Loss = 2.8842e-02, PNorm = 116.1910, GNorm = 0.2446, lr_0 = 7.0306e-04
Loss = 2.9879e-02, PNorm = 116.2446, GNorm = 0.2115, lr_0 = 7.0258e-04
Loss = 2.7651e-02, PNorm = 116.2976, GNorm = 0.6707, lr_0 = 7.0210e-04
Loss = 2.3489e-02, PNorm = 116.3439, GNorm = 0.3609, lr_0 = 7.0162e-04
Loss = 2.6616e-02, PNorm = 116.3847, GNorm = 0.2651, lr_0 = 7.0114e-04
Loss = 2.5989e-02, PNorm = 116.4233, GNorm = 0.6617, lr_0 = 7.0066e-04
Loss = 2.7618e-02, PNorm = 116.4692, GNorm = 0.4593, lr_0 = 7.0018e-04
Loss = 2.6122e-02, PNorm = 116.5171, GNorm = 0.4299, lr_0 = 6.9970e-04
Loss = 2.7986e-02, PNorm = 116.5629, GNorm = 0.3447, lr_0 = 6.9922e-04
Loss = 2.6960e-02, PNorm = 116.6118, GNorm = 0.3148, lr_0 = 6.9874e-04
Loss = 2.6466e-02, PNorm = 116.6548, GNorm = 0.3305, lr_0 = 6.9826e-04
Loss = 2.5413e-02, PNorm = 116.7011, GNorm = 0.4951, lr_0 = 6.9778e-04
Loss = 3.0295e-02, PNorm = 116.7507, GNorm = 0.4452, lr_0 = 6.9730e-04
Loss = 2.8788e-02, PNorm = 116.8070, GNorm = 0.4811, lr_0 = 6.9683e-04
Loss = 2.8673e-02, PNorm = 116.8623, GNorm = 0.5237, lr_0 = 6.9635e-04
Loss = 3.1210e-02, PNorm = 116.9190, GNorm = 0.7807, lr_0 = 6.9587e-04
Loss = 2.6961e-02, PNorm = 116.9681, GNorm = 0.3400, lr_0 = 6.9540e-04
Loss = 2.4856e-02, PNorm = 117.0195, GNorm = 0.4491, lr_0 = 6.9492e-04
Loss = 2.6455e-02, PNorm = 117.0678, GNorm = 0.4203, lr_0 = 6.9444e-04
Loss = 2.4853e-02, PNorm = 117.1139, GNorm = 0.3325, lr_0 = 6.9397e-04
Loss = 3.0238e-02, PNorm = 117.1651, GNorm = 0.3882, lr_0 = 6.9349e-04
Loss = 2.7363e-02, PNorm = 117.2139, GNorm = 0.2417, lr_0 = 6.9302e-04
Loss = 2.5928e-02, PNorm = 117.2637, GNorm = 0.2992, lr_0 = 6.9254e-04
Loss = 2.5266e-02, PNorm = 117.3110, GNorm = 0.4638, lr_0 = 6.9207e-04
Loss = 2.7820e-02, PNorm = 117.3629, GNorm = 0.3954, lr_0 = 6.9159e-04
Loss = 3.3352e-02, PNorm = 117.4085, GNorm = 0.4611, lr_0 = 6.9112e-04
Loss = 2.8732e-02, PNorm = 117.4663, GNorm = 0.2291, lr_0 = 6.9065e-04
Loss = 3.0923e-02, PNorm = 117.5237, GNorm = 0.5170, lr_0 = 6.9017e-04
Loss = 2.6403e-02, PNorm = 117.5755, GNorm = 0.4736, lr_0 = 6.8970e-04
Loss = 2.8130e-02, PNorm = 117.6316, GNorm = 0.3078, lr_0 = 6.8923e-04
Loss = 2.8223e-02, PNorm = 117.6845, GNorm = 0.5808, lr_0 = 6.8876e-04
Loss = 2.9909e-02, PNorm = 117.7412, GNorm = 0.2998, lr_0 = 6.8828e-04
Loss = 2.8393e-02, PNorm = 117.7990, GNorm = 0.3670, lr_0 = 6.8781e-04
Loss = 3.0156e-02, PNorm = 117.8543, GNorm = 0.7958, lr_0 = 6.8734e-04
Loss = 3.5003e-02, PNorm = 117.9065, GNorm = 0.4579, lr_0 = 6.8687e-04
Loss = 2.8631e-02, PNorm = 117.9573, GNorm = 0.4826, lr_0 = 6.8640e-04
Loss = 2.7576e-02, PNorm = 118.0069, GNorm = 0.2237, lr_0 = 6.8593e-04
Loss = 3.0790e-02, PNorm = 118.0627, GNorm = 1.1398, lr_0 = 6.8546e-04
Loss = 3.3513e-02, PNorm = 118.1188, GNorm = 0.2687, lr_0 = 6.8499e-04
Loss = 2.9486e-02, PNorm = 118.1720, GNorm = 0.2745, lr_0 = 6.8452e-04
Loss = 3.2495e-02, PNorm = 118.2293, GNorm = 0.3330, lr_0 = 6.8405e-04
Loss = 3.2329e-02, PNorm = 118.2883, GNorm = 0.3490, lr_0 = 6.8358e-04
Loss = 2.7815e-02, PNorm = 118.3403, GNorm = 0.2792, lr_0 = 6.8312e-04
Loss = 2.8084e-02, PNorm = 118.3990, GNorm = 0.6019, lr_0 = 6.8265e-04
Loss = 2.9503e-02, PNorm = 118.4612, GNorm = 0.6662, lr_0 = 6.8218e-04
Loss = 2.4248e-02, PNorm = 118.5160, GNorm = 0.3980, lr_0 = 6.8171e-04
Loss = 2.9635e-02, PNorm = 118.5683, GNorm = 0.3468, lr_0 = 6.8125e-04
Loss = 2.4071e-02, PNorm = 118.6164, GNorm = 0.5025, lr_0 = 6.8078e-04
Loss = 3.0444e-02, PNorm = 118.6724, GNorm = 0.6122, lr_0 = 6.8031e-04
Loss = 2.8115e-02, PNorm = 118.7248, GNorm = 0.2425, lr_0 = 6.7985e-04
Loss = 2.8383e-02, PNorm = 118.7776, GNorm = 0.2828, lr_0 = 6.7938e-04
Loss = 2.9801e-02, PNorm = 118.8341, GNorm = 0.2920, lr_0 = 6.7892e-04
Loss = 3.0967e-02, PNorm = 118.8903, GNorm = 0.5110, lr_0 = 6.7845e-04
Loss = 2.6599e-02, PNorm = 118.9467, GNorm = 0.2877, lr_0 = 6.7799e-04
Loss = 2.5041e-02, PNorm = 118.9947, GNorm = 0.2783, lr_0 = 6.7752e-04
Loss = 2.8479e-02, PNorm = 119.0470, GNorm = 0.6195, lr_0 = 6.7706e-04
Loss = 2.8416e-02, PNorm = 119.1033, GNorm = 0.4339, lr_0 = 6.7659e-04
Loss = 3.1968e-02, PNorm = 119.1504, GNorm = 0.3265, lr_0 = 6.7613e-04
Loss = 3.1436e-02, PNorm = 119.2067, GNorm = 0.3408, lr_0 = 6.7567e-04
Loss = 2.9451e-02, PNorm = 119.2645, GNorm = 0.4287, lr_0 = 6.7520e-04
Loss = 2.9513e-02, PNorm = 119.3184, GNorm = 0.6099, lr_0 = 6.7474e-04
Loss = 2.9381e-02, PNorm = 119.3701, GNorm = 0.3226, lr_0 = 6.7428e-04
Loss = 3.1482e-02, PNorm = 119.4238, GNorm = 0.2860, lr_0 = 6.7382e-04
Loss = 3.2755e-02, PNorm = 119.4767, GNorm = 0.6387, lr_0 = 6.7335e-04
Loss = 2.5740e-02, PNorm = 119.5376, GNorm = 0.3099, lr_0 = 6.7289e-04
Loss = 3.1444e-02, PNorm = 119.5838, GNorm = 0.2603, lr_0 = 6.7243e-04
Loss = 2.7199e-02, PNorm = 119.6421, GNorm = 0.3700, lr_0 = 6.7197e-04
Loss = 3.1480e-02, PNorm = 119.7063, GNorm = 0.4152, lr_0 = 6.7151e-04
Loss = 3.8223e-02, PNorm = 119.7677, GNorm = 0.5227, lr_0 = 6.7105e-04
Loss = 3.4739e-02, PNorm = 119.8217, GNorm = 0.4359, lr_0 = 6.7059e-04
Loss = 2.9624e-02, PNorm = 119.8808, GNorm = 0.7163, lr_0 = 6.7013e-04
Loss = 3.1198e-02, PNorm = 119.9408, GNorm = 0.2574, lr_0 = 6.6967e-04
Loss = 3.2216e-02, PNorm = 120.0042, GNorm = 0.7040, lr_0 = 6.6921e-04
Loss = 2.8669e-02, PNorm = 120.0584, GNorm = 0.4063, lr_0 = 6.6876e-04
Loss = 3.2588e-02, PNorm = 120.1085, GNorm = 0.8239, lr_0 = 6.6830e-04
Loss = 3.3232e-02, PNorm = 120.1620, GNorm = 0.4713, lr_0 = 6.6784e-04
Loss = 3.3107e-02, PNorm = 120.2209, GNorm = 0.4709, lr_0 = 6.6738e-04
Loss = 3.1985e-02, PNorm = 120.2818, GNorm = 0.6495, lr_0 = 6.6693e-04
Loss = 3.3851e-02, PNorm = 120.3441, GNorm = 0.2059, lr_0 = 6.6647e-04
Loss = 2.8362e-02, PNorm = 120.3990, GNorm = 0.2239, lr_0 = 6.6601e-04
Loss = 2.3358e-02, PNorm = 120.4525, GNorm = 0.3095, lr_0 = 6.6556e-04
Loss = 3.3220e-02, PNorm = 120.5074, GNorm = 0.2762, lr_0 = 6.6510e-04
Loss = 3.1008e-02, PNorm = 120.5640, GNorm = 0.7212, lr_0 = 6.6464e-04
Loss = 2.9170e-02, PNorm = 120.6224, GNorm = 0.8223, lr_0 = 6.6419e-04
Loss = 3.8649e-02, PNorm = 120.6865, GNorm = 0.7739, lr_0 = 6.6373e-04
Loss = 2.8810e-02, PNorm = 120.7485, GNorm = 0.4758, lr_0 = 6.6328e-04
Loss = 2.7639e-02, PNorm = 120.8103, GNorm = 0.4270, lr_0 = 6.6282e-04
Validation mae = 0.495150
Epoch 7
Loss = 2.4709e-02, PNorm = 120.8525, GNorm = 0.2026, lr_0 = 6.6237e-04
Loss = 2.4150e-02, PNorm = 120.8910, GNorm = 0.2613, lr_0 = 6.6192e-04
Loss = 2.6299e-02, PNorm = 120.9237, GNorm = 1.0021, lr_0 = 6.6146e-04
Loss = 2.2749e-02, PNorm = 120.9709, GNorm = 0.1592, lr_0 = 6.6101e-04
Loss = 2.4699e-02, PNorm = 121.0113, GNorm = 0.5133, lr_0 = 6.6056e-04
Loss = 2.3448e-02, PNorm = 121.0497, GNorm = 0.3313, lr_0 = 6.6011e-04
Loss = 2.3213e-02, PNorm = 121.0883, GNorm = 0.2668, lr_0 = 6.5965e-04
Loss = 2.6239e-02, PNorm = 121.1284, GNorm = 0.2833, lr_0 = 6.5920e-04
Loss = 2.3805e-02, PNorm = 121.1755, GNorm = 0.2464, lr_0 = 6.5875e-04
Loss = 2.2510e-02, PNorm = 121.2124, GNorm = 0.5472, lr_0 = 6.5830e-04
Loss = 2.2667e-02, PNorm = 121.2540, GNorm = 0.3428, lr_0 = 6.5785e-04
Loss = 2.6573e-02, PNorm = 121.2916, GNorm = 0.4708, lr_0 = 6.5740e-04
Loss = 2.5302e-02, PNorm = 121.3405, GNorm = 0.6963, lr_0 = 6.5695e-04
Loss = 2.3462e-02, PNorm = 121.3868, GNorm = 0.9994, lr_0 = 6.5650e-04
Loss = 2.3128e-02, PNorm = 121.4278, GNorm = 0.2698, lr_0 = 6.5605e-04
Loss = 2.5973e-02, PNorm = 121.4710, GNorm = 0.2703, lr_0 = 6.5560e-04
Loss = 2.4653e-02, PNorm = 121.5124, GNorm = 0.5735, lr_0 = 6.5515e-04
Loss = 2.3223e-02, PNorm = 121.5523, GNorm = 0.3978, lr_0 = 6.5470e-04
Loss = 2.2897e-02, PNorm = 121.5956, GNorm = 0.4592, lr_0 = 6.5425e-04
Loss = 2.1783e-02, PNorm = 121.6370, GNorm = 0.3811, lr_0 = 6.5380e-04
Loss = 2.2745e-02, PNorm = 121.6706, GNorm = 0.5864, lr_0 = 6.5335e-04
Loss = 1.9951e-02, PNorm = 121.7148, GNorm = 0.8279, lr_0 = 6.5291e-04
Loss = 2.0972e-02, PNorm = 121.7547, GNorm = 0.1988, lr_0 = 6.5246e-04
Loss = 2.0221e-02, PNorm = 121.7923, GNorm = 0.4849, lr_0 = 6.5201e-04
Loss = 2.5623e-02, PNorm = 121.8292, GNorm = 0.5595, lr_0 = 6.5157e-04
Loss = 1.9810e-02, PNorm = 121.8706, GNorm = 0.1299, lr_0 = 6.5112e-04
Loss = 2.1569e-02, PNorm = 121.9108, GNorm = 0.3272, lr_0 = 6.5067e-04
Loss = 1.8769e-02, PNorm = 121.9432, GNorm = 0.2348, lr_0 = 6.5023e-04
Loss = 2.0553e-02, PNorm = 121.9805, GNorm = 0.2166, lr_0 = 6.4978e-04
Loss = 1.9769e-02, PNorm = 122.0179, GNorm = 0.7175, lr_0 = 6.4934e-04
Loss = 2.3208e-02, PNorm = 122.0523, GNorm = 0.9774, lr_0 = 6.4889e-04
Loss = 2.2243e-02, PNorm = 122.0938, GNorm = 0.2577, lr_0 = 6.4845e-04
Loss = 2.4371e-02, PNorm = 122.1375, GNorm = 0.3431, lr_0 = 6.4800e-04
Loss = 2.1192e-02, PNorm = 122.1786, GNorm = 0.2994, lr_0 = 6.4756e-04
Loss = 2.2397e-02, PNorm = 122.2192, GNorm = 0.6088, lr_0 = 6.4712e-04
Loss = 2.0869e-02, PNorm = 122.2589, GNorm = 0.2269, lr_0 = 6.4667e-04
Loss = 2.0480e-02, PNorm = 122.2958, GNorm = 0.1819, lr_0 = 6.4623e-04
Loss = 2.2929e-02, PNorm = 122.3336, GNorm = 0.1911, lr_0 = 6.4579e-04
Loss = 2.2571e-02, PNorm = 122.3811, GNorm = 0.2251, lr_0 = 6.4534e-04
Loss = 2.2284e-02, PNorm = 122.4268, GNorm = 0.2206, lr_0 = 6.4490e-04
Loss = 2.0543e-02, PNorm = 122.4660, GNorm = 0.2883, lr_0 = 6.4446e-04
Loss = 2.2658e-02, PNorm = 122.5078, GNorm = 0.3423, lr_0 = 6.4402e-04
Loss = 2.4362e-02, PNorm = 122.5518, GNorm = 0.3673, lr_0 = 6.4358e-04
Loss = 2.3342e-02, PNorm = 122.6005, GNorm = 0.7157, lr_0 = 6.4314e-04
Loss = 2.2767e-02, PNorm = 122.6392, GNorm = 0.4383, lr_0 = 6.4270e-04
Loss = 2.0562e-02, PNorm = 122.6836, GNorm = 0.2557, lr_0 = 6.4226e-04
Loss = 1.9882e-02, PNorm = 122.7292, GNorm = 0.3852, lr_0 = 6.4182e-04
Loss = 2.1840e-02, PNorm = 122.7679, GNorm = 0.4595, lr_0 = 6.4138e-04
Loss = 2.1672e-02, PNorm = 122.8103, GNorm = 0.2239, lr_0 = 6.4094e-04
Loss = 1.9334e-02, PNorm = 122.8530, GNorm = 0.4334, lr_0 = 6.4050e-04
Loss = 1.9187e-02, PNorm = 122.8945, GNorm = 0.1878, lr_0 = 6.4006e-04
Loss = 2.3073e-02, PNorm = 122.9393, GNorm = 0.3550, lr_0 = 6.3962e-04
Loss = 2.2381e-02, PNorm = 122.9875, GNorm = 0.1758, lr_0 = 6.3918e-04
Loss = 1.7785e-02, PNorm = 123.0340, GNorm = 0.3362, lr_0 = 6.3874e-04
Loss = 2.0228e-02, PNorm = 123.0730, GNorm = 0.7728, lr_0 = 6.3831e-04
Loss = 1.8897e-02, PNorm = 123.1152, GNorm = 0.1454, lr_0 = 6.3787e-04
Loss = 2.5217e-02, PNorm = 123.1581, GNorm = 0.2316, lr_0 = 6.3743e-04
Loss = 2.3865e-02, PNorm = 123.1985, GNorm = 0.2480, lr_0 = 6.3700e-04
Loss = 2.1515e-02, PNorm = 123.2489, GNorm = 0.4339, lr_0 = 6.3656e-04
Loss = 2.1536e-02, PNorm = 123.2983, GNorm = 0.4123, lr_0 = 6.3612e-04
Loss = 2.0863e-02, PNorm = 123.3413, GNorm = 0.2444, lr_0 = 6.3569e-04
Loss = 3.0452e-02, PNorm = 123.3860, GNorm = 0.5934, lr_0 = 6.3525e-04
Loss = 2.4336e-02, PNorm = 123.4409, GNorm = 0.3693, lr_0 = 6.3482e-04
Loss = 2.6642e-02, PNorm = 123.4918, GNorm = 0.7777, lr_0 = 6.3438e-04
Loss = 2.8565e-02, PNorm = 123.5397, GNorm = 0.8012, lr_0 = 6.3395e-04
Loss = 2.1627e-02, PNorm = 123.5807, GNorm = 0.2947, lr_0 = 6.3351e-04
Loss = 2.8004e-02, PNorm = 123.6290, GNorm = 0.4961, lr_0 = 6.3308e-04
Loss = 2.3004e-02, PNorm = 123.6687, GNorm = 0.5200, lr_0 = 6.3265e-04
Loss = 2.2838e-02, PNorm = 123.7164, GNorm = 0.2773, lr_0 = 6.3221e-04
Loss = 2.3688e-02, PNorm = 123.7627, GNorm = 0.3659, lr_0 = 6.3178e-04
Loss = 2.2114e-02, PNorm = 123.8062, GNorm = 0.4342, lr_0 = 6.3135e-04
Loss = 2.1027e-02, PNorm = 123.8541, GNorm = 0.3583, lr_0 = 6.3091e-04
Loss = 2.1941e-02, PNorm = 123.8995, GNorm = 0.1879, lr_0 = 6.3048e-04
Loss = 2.0957e-02, PNorm = 123.9433, GNorm = 0.3212, lr_0 = 6.3005e-04
Loss = 2.5807e-02, PNorm = 123.9904, GNorm = 0.3031, lr_0 = 6.2962e-04
Loss = 2.4061e-02, PNorm = 124.0404, GNorm = 0.5656, lr_0 = 6.2919e-04
Loss = 2.1609e-02, PNorm = 124.0916, GNorm = 0.1911, lr_0 = 6.2876e-04
Loss = 2.5720e-02, PNorm = 124.1384, GNorm = 0.4731, lr_0 = 6.2833e-04
Loss = 1.9926e-02, PNorm = 124.1925, GNorm = 0.2159, lr_0 = 6.2789e-04
Loss = 2.1590e-02, PNorm = 124.2362, GNorm = 0.3679, lr_0 = 6.2746e-04
Loss = 2.5049e-02, PNorm = 124.2837, GNorm = 0.3142, lr_0 = 6.2703e-04
Loss = 2.2773e-02, PNorm = 124.3337, GNorm = 0.4256, lr_0 = 6.2661e-04
Loss = 2.2978e-02, PNorm = 124.3798, GNorm = 0.3039, lr_0 = 6.2618e-04
Loss = 2.1903e-02, PNorm = 124.4276, GNorm = 0.1421, lr_0 = 6.2575e-04
Loss = 2.4783e-02, PNorm = 124.4818, GNorm = 0.5077, lr_0 = 6.2532e-04
Loss = 2.1676e-02, PNorm = 124.5299, GNorm = 0.4453, lr_0 = 6.2489e-04
Loss = 2.1336e-02, PNorm = 124.5764, GNorm = 0.4544, lr_0 = 6.2446e-04
Loss = 2.3346e-02, PNorm = 124.6267, GNorm = 0.5579, lr_0 = 6.2403e-04
Loss = 2.3678e-02, PNorm = 124.6779, GNorm = 0.3102, lr_0 = 6.2361e-04
Loss = 2.0911e-02, PNorm = 124.7231, GNorm = 0.2503, lr_0 = 6.2318e-04
Loss = 2.2773e-02, PNorm = 124.7716, GNorm = 0.2772, lr_0 = 6.2275e-04
Loss = 2.2185e-02, PNorm = 124.8197, GNorm = 0.5684, lr_0 = 6.2233e-04
Loss = 1.7687e-02, PNorm = 124.8630, GNorm = 0.3369, lr_0 = 6.2190e-04
Loss = 2.1517e-02, PNorm = 124.9096, GNorm = 0.1969, lr_0 = 6.2147e-04
Loss = 2.3508e-02, PNorm = 124.9632, GNorm = 0.2291, lr_0 = 6.2105e-04
Loss = 2.4678e-02, PNorm = 125.0075, GNorm = 0.2161, lr_0 = 6.2062e-04
Loss = 2.7623e-02, PNorm = 125.0617, GNorm = 0.6393, lr_0 = 6.2020e-04
Loss = 2.6254e-02, PNorm = 125.1171, GNorm = 0.1996, lr_0 = 6.1977e-04
Loss = 2.4362e-02, PNorm = 125.1736, GNorm = 0.4212, lr_0 = 6.1935e-04
Loss = 2.3939e-02, PNorm = 125.2286, GNorm = 0.6614, lr_0 = 6.1892e-04
Loss = 2.3312e-02, PNorm = 125.2817, GNorm = 0.2044, lr_0 = 6.1850e-04
Loss = 2.5217e-02, PNorm = 125.3350, GNorm = 0.6032, lr_0 = 6.1808e-04
Loss = 2.4383e-02, PNorm = 125.3929, GNorm = 0.4099, lr_0 = 6.1765e-04
Loss = 2.5134e-02, PNorm = 125.4511, GNorm = 0.2865, lr_0 = 6.1723e-04
Loss = 2.2848e-02, PNorm = 125.5060, GNorm = 0.3187, lr_0 = 6.1681e-04
Loss = 2.1047e-02, PNorm = 125.5522, GNorm = 0.1988, lr_0 = 6.1638e-04
Loss = 2.5631e-02, PNorm = 125.5900, GNorm = 0.4486, lr_0 = 6.1596e-04
Loss = 2.3610e-02, PNorm = 125.6447, GNorm = 0.3762, lr_0 = 6.1554e-04
Loss = 2.4414e-02, PNorm = 125.6992, GNorm = 0.3336, lr_0 = 6.1512e-04
Loss = 2.2100e-02, PNorm = 125.7521, GNorm = 0.2666, lr_0 = 6.1470e-04
Loss = 2.4109e-02, PNorm = 125.8019, GNorm = 0.3069, lr_0 = 6.1428e-04
Loss = 2.1768e-02, PNorm = 125.8557, GNorm = 0.4132, lr_0 = 6.1385e-04
Loss = 1.9529e-02, PNorm = 125.9024, GNorm = 0.2580, lr_0 = 6.1343e-04
Loss = 2.3190e-02, PNorm = 125.9517, GNorm = 0.1959, lr_0 = 6.1301e-04
Loss = 2.3675e-02, PNorm = 126.0013, GNorm = 0.8349, lr_0 = 6.1259e-04
Loss = 2.2215e-02, PNorm = 126.0514, GNorm = 0.3843, lr_0 = 6.1217e-04
Loss = 2.5318e-02, PNorm = 126.0999, GNorm = 0.3618, lr_0 = 6.1175e-04
Loss = 2.1415e-02, PNorm = 126.1547, GNorm = 0.4080, lr_0 = 6.1134e-04
Loss = 2.6637e-02, PNorm = 126.2044, GNorm = 0.2291, lr_0 = 6.1092e-04
Loss = 2.5731e-02, PNorm = 126.2571, GNorm = 0.8322, lr_0 = 6.1050e-04
Validation mae = 0.488837
Epoch 8
Loss = 1.7476e-02, PNorm = 126.3085, GNorm = 0.3592, lr_0 = 6.1008e-04
Loss = 2.0479e-02, PNorm = 126.3467, GNorm = 0.4002, lr_0 = 6.0966e-04
Loss = 2.3637e-02, PNorm = 126.3877, GNorm = 0.6341, lr_0 = 6.0924e-04
Loss = 2.0288e-02, PNorm = 126.4202, GNorm = 0.4550, lr_0 = 6.0883e-04
Loss = 2.0371e-02, PNorm = 126.4505, GNorm = 0.2547, lr_0 = 6.0841e-04
Loss = 1.8652e-02, PNorm = 126.4833, GNorm = 0.3727, lr_0 = 6.0799e-04
Loss = 1.7714e-02, PNorm = 126.5150, GNorm = 0.4469, lr_0 = 6.0758e-04
Loss = 1.8261e-02, PNorm = 126.5446, GNorm = 0.1948, lr_0 = 6.0716e-04
Loss = 1.7416e-02, PNorm = 126.5726, GNorm = 0.4382, lr_0 = 6.0674e-04
Loss = 2.0306e-02, PNorm = 126.6036, GNorm = 0.2181, lr_0 = 6.0633e-04
Loss = 1.9024e-02, PNorm = 126.6394, GNorm = 0.4561, lr_0 = 6.0591e-04
Loss = 2.1893e-02, PNorm = 126.6684, GNorm = 0.2460, lr_0 = 6.0550e-04
Loss = 1.8117e-02, PNorm = 126.7044, GNorm = 0.2099, lr_0 = 6.0508e-04
Loss = 1.7841e-02, PNorm = 126.7389, GNorm = 0.1378, lr_0 = 6.0467e-04
Loss = 2.0354e-02, PNorm = 126.7750, GNorm = 0.2164, lr_0 = 6.0425e-04
Loss = 1.7946e-02, PNorm = 126.8047, GNorm = 0.2070, lr_0 = 6.0384e-04
Loss = 1.8695e-02, PNorm = 126.8333, GNorm = 0.2478, lr_0 = 6.0343e-04
Loss = 1.9448e-02, PNorm = 126.8646, GNorm = 0.3034, lr_0 = 6.0301e-04
Loss = 1.6904e-02, PNorm = 126.8969, GNorm = 0.1907, lr_0 = 6.0260e-04
Loss = 1.7966e-02, PNorm = 126.9282, GNorm = 0.1592, lr_0 = 6.0219e-04
Loss = 1.6850e-02, PNorm = 126.9568, GNorm = 0.2128, lr_0 = 6.0178e-04
Loss = 2.0346e-02, PNorm = 126.9965, GNorm = 0.4966, lr_0 = 6.0136e-04
Loss = 1.8007e-02, PNorm = 127.0385, GNorm = 0.4222, lr_0 = 6.0095e-04
Loss = 1.5938e-02, PNorm = 127.0728, GNorm = 0.2840, lr_0 = 6.0054e-04
Loss = 1.9635e-02, PNorm = 127.1042, GNorm = 0.3536, lr_0 = 6.0013e-04
Loss = 2.1542e-02, PNorm = 127.1430, GNorm = 0.5104, lr_0 = 5.9972e-04
Loss = 2.1018e-02, PNorm = 127.1848, GNorm = 0.4641, lr_0 = 5.9931e-04
Loss = 2.6326e-02, PNorm = 127.2250, GNorm = 0.1980, lr_0 = 5.9890e-04
Loss = 1.6928e-02, PNorm = 127.2622, GNorm = 0.5079, lr_0 = 5.9849e-04
Loss = 1.7830e-02, PNorm = 127.3015, GNorm = 0.2549, lr_0 = 5.9808e-04
Loss = 1.8038e-02, PNorm = 127.3348, GNorm = 0.9216, lr_0 = 5.9767e-04
Loss = 1.8098e-02, PNorm = 127.3707, GNorm = 0.3452, lr_0 = 5.9726e-04
Loss = 2.2043e-02, PNorm = 127.4058, GNorm = 1.0205, lr_0 = 5.9685e-04
Loss = 1.7414e-02, PNorm = 127.4377, GNorm = 0.2965, lr_0 = 5.9644e-04
Loss = 1.8359e-02, PNorm = 127.4690, GNorm = 0.4165, lr_0 = 5.9603e-04
Loss = 1.5207e-02, PNorm = 127.5013, GNorm = 0.2000, lr_0 = 5.9562e-04
Loss = 1.7047e-02, PNorm = 127.5309, GNorm = 0.6297, lr_0 = 5.9521e-04
Loss = 1.7197e-02, PNorm = 127.5663, GNorm = 0.1664, lr_0 = 5.9481e-04
Loss = 1.9698e-02, PNorm = 127.6033, GNorm = 0.2492, lr_0 = 5.9440e-04
Loss = 2.0212e-02, PNorm = 127.6368, GNorm = 0.9404, lr_0 = 5.9399e-04
Loss = 1.8040e-02, PNorm = 127.6723, GNorm = 0.3215, lr_0 = 5.9358e-04
Loss = 1.6542e-02, PNorm = 127.7096, GNorm = 0.7793, lr_0 = 5.9318e-04
Loss = 1.9403e-02, PNorm = 127.7516, GNorm = 0.4865, lr_0 = 5.9277e-04
Loss = 1.9631e-02, PNorm = 127.7876, GNorm = 0.1431, lr_0 = 5.9236e-04
Loss = 1.9553e-02, PNorm = 127.8278, GNorm = 0.3976, lr_0 = 5.9196e-04
Loss = 2.0163e-02, PNorm = 127.8644, GNorm = 0.4722, lr_0 = 5.9155e-04
Loss = 1.8707e-02, PNorm = 127.9085, GNorm = 0.2647, lr_0 = 5.9115e-04
Loss = 1.6824e-02, PNorm = 127.9487, GNorm = 0.4645, lr_0 = 5.9074e-04
Loss = 2.0804e-02, PNorm = 127.9799, GNorm = 0.2802, lr_0 = 5.9034e-04
Loss = 1.9415e-02, PNorm = 128.0126, GNorm = 0.4186, lr_0 = 5.8993e-04
Loss = 1.7712e-02, PNorm = 128.0468, GNorm = 0.2661, lr_0 = 5.8953e-04
Loss = 1.9789e-02, PNorm = 128.0806, GNorm = 0.2309, lr_0 = 5.8913e-04
Loss = 2.2159e-02, PNorm = 128.1178, GNorm = 0.7681, lr_0 = 5.8872e-04
Loss = 2.0160e-02, PNorm = 128.1567, GNorm = 0.3383, lr_0 = 5.8832e-04
Loss = 2.1900e-02, PNorm = 128.1972, GNorm = 0.3850, lr_0 = 5.8792e-04
Loss = 1.5739e-02, PNorm = 128.2432, GNorm = 0.1991, lr_0 = 5.8751e-04
Loss = 1.7287e-02, PNorm = 128.2837, GNorm = 0.4302, lr_0 = 5.8711e-04
Loss = 1.5821e-02, PNorm = 128.3169, GNorm = 0.4260, lr_0 = 5.8671e-04
Loss = 1.7950e-02, PNorm = 128.3529, GNorm = 0.7094, lr_0 = 5.8631e-04
Loss = 1.9134e-02, PNorm = 128.3925, GNorm = 0.5700, lr_0 = 5.8591e-04
Loss = 1.8724e-02, PNorm = 128.4319, GNorm = 0.1861, lr_0 = 5.8550e-04
Loss = 1.9163e-02, PNorm = 128.4778, GNorm = 0.3565, lr_0 = 5.8510e-04
Loss = 1.9937e-02, PNorm = 128.5240, GNorm = 0.7177, lr_0 = 5.8470e-04
Loss = 1.9006e-02, PNorm = 128.5685, GNorm = 0.7264, lr_0 = 5.8430e-04
Loss = 1.5802e-02, PNorm = 128.6068, GNorm = 0.1432, lr_0 = 5.8390e-04
Loss = 1.9762e-02, PNorm = 128.6454, GNorm = 0.2067, lr_0 = 5.8350e-04
Loss = 1.5696e-02, PNorm = 128.6802, GNorm = 0.2247, lr_0 = 5.8310e-04
Loss = 1.5715e-02, PNorm = 128.7138, GNorm = 0.3183, lr_0 = 5.8270e-04
Loss = 2.1314e-02, PNorm = 128.7519, GNorm = 0.1427, lr_0 = 5.8230e-04
Loss = 1.6635e-02, PNorm = 128.7866, GNorm = 0.2794, lr_0 = 5.8190e-04
Loss = 1.7760e-02, PNorm = 128.8253, GNorm = 0.5103, lr_0 = 5.8151e-04
Loss = 2.1458e-02, PNorm = 128.8636, GNorm = 0.3397, lr_0 = 5.8111e-04
Loss = 1.7048e-02, PNorm = 128.9059, GNorm = 0.1997, lr_0 = 5.8071e-04
Loss = 1.7492e-02, PNorm = 128.9435, GNorm = 0.1608, lr_0 = 5.8031e-04
Loss = 1.9238e-02, PNorm = 128.9771, GNorm = 0.3035, lr_0 = 5.7991e-04
Loss = 1.8696e-02, PNorm = 129.0103, GNorm = 0.5830, lr_0 = 5.7952e-04
Loss = 1.8453e-02, PNorm = 129.0458, GNorm = 0.5253, lr_0 = 5.7912e-04
Loss = 1.7385e-02, PNorm = 129.0841, GNorm = 0.6531, lr_0 = 5.7872e-04
Loss = 1.8509e-02, PNorm = 129.1243, GNorm = 0.2654, lr_0 = 5.7833e-04
Loss = 2.0050e-02, PNorm = 129.1666, GNorm = 0.7015, lr_0 = 5.7793e-04
Loss = 2.0131e-02, PNorm = 129.2123, GNorm = 0.3511, lr_0 = 5.7753e-04
Loss = 1.5388e-02, PNorm = 129.2510, GNorm = 0.1409, lr_0 = 5.7714e-04
Loss = 1.9527e-02, PNorm = 129.2895, GNorm = 0.7459, lr_0 = 5.7674e-04
Loss = 1.7100e-02, PNorm = 129.3333, GNorm = 0.3932, lr_0 = 5.7635e-04
Loss = 1.9709e-02, PNorm = 129.3763, GNorm = 0.2167, lr_0 = 5.7595e-04
Loss = 1.9405e-02, PNorm = 129.4183, GNorm = 0.6236, lr_0 = 5.7556e-04
Loss = 1.9616e-02, PNorm = 129.4564, GNorm = 0.1759, lr_0 = 5.7516e-04
Loss = 2.0018e-02, PNorm = 129.5013, GNorm = 0.2591, lr_0 = 5.7477e-04
Loss = 2.0258e-02, PNorm = 129.5471, GNorm = 0.4126, lr_0 = 5.7438e-04
Loss = 1.8601e-02, PNorm = 129.5844, GNorm = 0.2390, lr_0 = 5.7398e-04
Loss = 2.2898e-02, PNorm = 129.6276, GNorm = 0.3208, lr_0 = 5.7359e-04
Loss = 1.7928e-02, PNorm = 129.6676, GNorm = 0.2656, lr_0 = 5.7320e-04
Loss = 1.9315e-02, PNorm = 129.7113, GNorm = 0.3573, lr_0 = 5.7280e-04
Loss = 1.8304e-02, PNorm = 129.7590, GNorm = 0.1846, lr_0 = 5.7241e-04
Loss = 2.1224e-02, PNorm = 129.8078, GNorm = 0.2214, lr_0 = 5.7202e-04
Loss = 2.0965e-02, PNorm = 129.8540, GNorm = 0.2068, lr_0 = 5.7163e-04
Loss = 1.9242e-02, PNorm = 129.8995, GNorm = 0.5957, lr_0 = 5.7124e-04
Loss = 1.6461e-02, PNorm = 129.9414, GNorm = 0.3637, lr_0 = 5.7084e-04
Loss = 1.8686e-02, PNorm = 129.9824, GNorm = 0.2326, lr_0 = 5.7045e-04
Loss = 1.7078e-02, PNorm = 130.0232, GNorm = 0.1988, lr_0 = 5.7006e-04
Loss = 1.6723e-02, PNorm = 130.0609, GNorm = 0.2017, lr_0 = 5.6967e-04
Loss = 2.1394e-02, PNorm = 130.1010, GNorm = 0.6932, lr_0 = 5.6928e-04
Loss = 2.1883e-02, PNorm = 130.1465, GNorm = 0.7027, lr_0 = 5.6889e-04
Loss = 2.0515e-02, PNorm = 130.1887, GNorm = 0.6803, lr_0 = 5.6850e-04
Loss = 2.0663e-02, PNorm = 130.2280, GNorm = 0.1185, lr_0 = 5.6811e-04
Loss = 1.8521e-02, PNorm = 130.2659, GNorm = 0.4970, lr_0 = 5.6772e-04
Loss = 1.8328e-02, PNorm = 130.3090, GNorm = 0.3753, lr_0 = 5.6733e-04
Loss = 1.8192e-02, PNorm = 130.3534, GNorm = 0.5252, lr_0 = 5.6695e-04
Loss = 1.7493e-02, PNorm = 130.3954, GNorm = 0.4147, lr_0 = 5.6656e-04
Loss = 1.7885e-02, PNorm = 130.4393, GNorm = 0.2285, lr_0 = 5.6617e-04
Loss = 1.8402e-02, PNorm = 130.4749, GNorm = 0.3082, lr_0 = 5.6578e-04
Loss = 2.1132e-02, PNorm = 130.5209, GNorm = 0.1999, lr_0 = 5.6539e-04
Loss = 1.6886e-02, PNorm = 130.5698, GNorm = 0.3501, lr_0 = 5.6501e-04
Loss = 1.8542e-02, PNorm = 130.6103, GNorm = 0.3334, lr_0 = 5.6462e-04
Loss = 1.7571e-02, PNorm = 130.6497, GNorm = 0.1793, lr_0 = 5.6423e-04
Loss = 2.1805e-02, PNorm = 130.6835, GNorm = 0.4325, lr_0 = 5.6385e-04
Loss = 1.6879e-02, PNorm = 130.7234, GNorm = 0.6385, lr_0 = 5.6346e-04
Loss = 2.0735e-02, PNorm = 130.7628, GNorm = 0.5041, lr_0 = 5.6307e-04
Loss = 1.8902e-02, PNorm = 130.8060, GNorm = 0.3386, lr_0 = 5.6269e-04
Loss = 2.0043e-02, PNorm = 130.8485, GNorm = 0.4237, lr_0 = 5.6230e-04
Validation mae = 0.485533
Epoch 9
Loss = 1.6714e-02, PNorm = 130.8883, GNorm = 0.1719, lr_0 = 5.6192e-04
Loss = 1.9774e-02, PNorm = 130.9209, GNorm = 0.3272, lr_0 = 5.6153e-04
Loss = 1.5274e-02, PNorm = 130.9466, GNorm = 0.1859, lr_0 = 5.6115e-04
Loss = 1.4720e-02, PNorm = 130.9780, GNorm = 0.4637, lr_0 = 5.6076e-04
Loss = 1.8347e-02, PNorm = 131.0070, GNorm = 0.3812, lr_0 = 5.6038e-04
Loss = 1.7442e-02, PNorm = 131.0396, GNorm = 0.4174, lr_0 = 5.6000e-04
Loss = 1.3605e-02, PNorm = 131.0729, GNorm = 0.2160, lr_0 = 5.5961e-04
Loss = 1.4703e-02, PNorm = 131.1032, GNorm = 0.4101, lr_0 = 5.5923e-04
Loss = 1.5860e-02, PNorm = 131.1326, GNorm = 0.2005, lr_0 = 5.5885e-04
Loss = 1.5766e-02, PNorm = 131.1628, GNorm = 0.1580, lr_0 = 5.5846e-04
Loss = 1.7320e-02, PNorm = 131.1916, GNorm = 0.5266, lr_0 = 5.5808e-04
Loss = 1.4890e-02, PNorm = 131.2221, GNorm = 0.1964, lr_0 = 5.5770e-04
Loss = 1.5132e-02, PNorm = 131.2573, GNorm = 0.7139, lr_0 = 5.5732e-04
Loss = 1.5606e-02, PNorm = 131.2876, GNorm = 0.2605, lr_0 = 5.5693e-04
Loss = 1.3319e-02, PNorm = 131.3178, GNorm = 0.2039, lr_0 = 5.5655e-04
Loss = 1.8195e-02, PNorm = 131.3514, GNorm = 0.2418, lr_0 = 5.5617e-04
Loss = 1.1933e-02, PNorm = 131.3854, GNorm = 0.2703, lr_0 = 5.5579e-04
Loss = 1.4204e-02, PNorm = 131.4076, GNorm = 0.3487, lr_0 = 5.5541e-04
Loss = 1.4970e-02, PNorm = 131.4338, GNorm = 0.1739, lr_0 = 5.5503e-04
Loss = 1.2988e-02, PNorm = 131.4524, GNorm = 0.3119, lr_0 = 5.5465e-04
Loss = 1.5029e-02, PNorm = 131.4770, GNorm = 0.1801, lr_0 = 5.5427e-04
Loss = 1.4775e-02, PNorm = 131.5004, GNorm = 0.2322, lr_0 = 5.5389e-04
Loss = 1.4518e-02, PNorm = 131.5266, GNorm = 0.4332, lr_0 = 5.5351e-04
Loss = 1.3462e-02, PNorm = 131.5580, GNorm = 0.4375, lr_0 = 5.5313e-04
Loss = 1.4152e-02, PNorm = 131.5787, GNorm = 0.1303, lr_0 = 5.5275e-04
Loss = 1.5270e-02, PNorm = 131.5994, GNorm = 0.1455, lr_0 = 5.5237e-04
Loss = 1.2772e-02, PNorm = 131.6305, GNorm = 0.2821, lr_0 = 5.5199e-04
Loss = 1.5307e-02, PNorm = 131.6605, GNorm = 0.6735, lr_0 = 5.5162e-04
Loss = 1.2096e-02, PNorm = 131.6919, GNorm = 0.2623, lr_0 = 5.5124e-04
Loss = 1.5035e-02, PNorm = 131.7191, GNorm = 0.2735, lr_0 = 5.5086e-04
Loss = 1.4155e-02, PNorm = 131.7420, GNorm = 0.2941, lr_0 = 5.5048e-04
Loss = 1.3815e-02, PNorm = 131.7706, GNorm = 0.2450, lr_0 = 5.5011e-04
Loss = 1.5173e-02, PNorm = 131.7993, GNorm = 0.2490, lr_0 = 5.4973e-04
Loss = 1.2369e-02, PNorm = 131.8288, GNorm = 0.1713, lr_0 = 5.4935e-04
Loss = 1.6183e-02, PNorm = 131.8640, GNorm = 0.5402, lr_0 = 5.4898e-04
Loss = 1.5924e-02, PNorm = 131.8977, GNorm = 0.3526, lr_0 = 5.4860e-04
Loss = 1.3821e-02, PNorm = 131.9279, GNorm = 0.2046, lr_0 = 5.4822e-04
Loss = 1.1639e-02, PNorm = 131.9519, GNorm = 0.3041, lr_0 = 5.4785e-04
Loss = 1.4064e-02, PNorm = 131.9722, GNorm = 0.5979, lr_0 = 5.4747e-04
Loss = 1.3392e-02, PNorm = 132.0032, GNorm = 0.3769, lr_0 = 5.4710e-04
Loss = 1.4719e-02, PNorm = 132.0373, GNorm = 0.6312, lr_0 = 5.4672e-04
Loss = 1.3489e-02, PNorm = 132.0688, GNorm = 0.2553, lr_0 = 5.4635e-04
Loss = 1.4942e-02, PNorm = 132.1005, GNorm = 0.2634, lr_0 = 5.4597e-04
Loss = 1.5550e-02, PNorm = 132.1328, GNorm = 0.1539, lr_0 = 5.4560e-04
Loss = 1.3371e-02, PNorm = 132.1644, GNorm = 0.2903, lr_0 = 5.4523e-04
Loss = 1.4744e-02, PNorm = 132.1880, GNorm = 0.1711, lr_0 = 5.4485e-04
Loss = 1.5369e-02, PNorm = 132.2219, GNorm = 0.3141, lr_0 = 5.4448e-04
Loss = 1.4664e-02, PNorm = 132.2524, GNorm = 0.6775, lr_0 = 5.4411e-04
Loss = 1.5688e-02, PNorm = 132.2896, GNorm = 0.3284, lr_0 = 5.4373e-04
Loss = 1.5373e-02, PNorm = 132.3281, GNorm = 0.3038, lr_0 = 5.4336e-04
Loss = 1.8299e-02, PNorm = 132.3582, GNorm = 0.4470, lr_0 = 5.4299e-04
Loss = 1.6619e-02, PNorm = 132.3860, GNorm = 0.1738, lr_0 = 5.4262e-04
Loss = 1.5601e-02, PNorm = 132.4196, GNorm = 0.2782, lr_0 = 5.4225e-04
Loss = 1.7163e-02, PNorm = 132.4510, GNorm = 0.5246, lr_0 = 5.4187e-04
Loss = 1.6778e-02, PNorm = 132.4847, GNorm = 0.3967, lr_0 = 5.4150e-04
Loss = 1.4356e-02, PNorm = 132.5278, GNorm = 0.2242, lr_0 = 5.4113e-04
Loss = 1.5176e-02, PNorm = 132.5661, GNorm = 0.2658, lr_0 = 5.4076e-04
Loss = 1.7081e-02, PNorm = 132.6031, GNorm = 0.4095, lr_0 = 5.4039e-04
Loss = 1.6954e-02, PNorm = 132.6330, GNorm = 0.3731, lr_0 = 5.4002e-04
Loss = 1.2980e-02, PNorm = 132.6605, GNorm = 0.2261, lr_0 = 5.3965e-04
Loss = 1.7094e-02, PNorm = 132.6949, GNorm = 0.3480, lr_0 = 5.3928e-04
Loss = 1.6377e-02, PNorm = 132.7362, GNorm = 0.5849, lr_0 = 5.3891e-04
Loss = 1.6777e-02, PNorm = 132.7716, GNorm = 0.2547, lr_0 = 5.3854e-04
Loss = 1.5994e-02, PNorm = 132.8101, GNorm = 0.7560, lr_0 = 5.3817e-04
Loss = 1.5396e-02, PNorm = 132.8481, GNorm = 0.2436, lr_0 = 5.3781e-04
Loss = 1.9489e-02, PNorm = 132.8761, GNorm = 0.2884, lr_0 = 5.3744e-04
Loss = 1.5937e-02, PNorm = 132.9079, GNorm = 0.3930, lr_0 = 5.3707e-04
Loss = 1.4634e-02, PNorm = 132.9372, GNorm = 0.2290, lr_0 = 5.3670e-04
Loss = 1.3979e-02, PNorm = 132.9692, GNorm = 0.2251, lr_0 = 5.3633e-04
Loss = 1.5023e-02, PNorm = 133.0017, GNorm = 0.2894, lr_0 = 5.3597e-04
Loss = 1.3531e-02, PNorm = 133.0394, GNorm = 0.5117, lr_0 = 5.3560e-04
Loss = 1.4695e-02, PNorm = 133.0740, GNorm = 0.3851, lr_0 = 5.3523e-04
Loss = 1.3337e-02, PNorm = 133.1058, GNorm = 0.4039, lr_0 = 5.3486e-04
Loss = 1.5237e-02, PNorm = 133.1376, GNorm = 0.2879, lr_0 = 5.3450e-04
Loss = 1.5445e-02, PNorm = 133.1685, GNorm = 0.1382, lr_0 = 5.3413e-04
Loss = 1.3599e-02, PNorm = 133.2051, GNorm = 0.5433, lr_0 = 5.3377e-04
Loss = 1.4969e-02, PNorm = 133.2375, GNorm = 0.2432, lr_0 = 5.3340e-04
Loss = 1.6818e-02, PNorm = 133.2751, GNorm = 0.2702, lr_0 = 5.3304e-04
Loss = 1.8639e-02, PNorm = 133.3092, GNorm = 0.2022, lr_0 = 5.3267e-04
Loss = 1.3629e-02, PNorm = 133.3497, GNorm = 0.3310, lr_0 = 5.3231e-04
Loss = 1.5852e-02, PNorm = 133.3882, GNorm = 0.2978, lr_0 = 5.3194e-04
Loss = 1.7296e-02, PNorm = 133.4199, GNorm = 0.4440, lr_0 = 5.3158e-04
Loss = 1.0829e-02, PNorm = 133.4466, GNorm = 0.2787, lr_0 = 5.3121e-04
Loss = 1.2827e-02, PNorm = 133.4759, GNorm = 0.1653, lr_0 = 5.3085e-04
Loss = 1.6933e-02, PNorm = 133.5092, GNorm = 0.3071, lr_0 = 5.3048e-04
Loss = 1.4145e-02, PNorm = 133.5454, GNorm = 0.4786, lr_0 = 5.3012e-04
Loss = 1.6882e-02, PNorm = 133.5741, GNorm = 0.2894, lr_0 = 5.2976e-04
Loss = 1.5479e-02, PNorm = 133.6062, GNorm = 0.1608, lr_0 = 5.2939e-04
Loss = 1.4315e-02, PNorm = 133.6383, GNorm = 0.5934, lr_0 = 5.2903e-04
Loss = 1.5063e-02, PNorm = 133.6765, GNorm = 0.5120, lr_0 = 5.2867e-04
Loss = 1.6861e-02, PNorm = 133.7096, GNorm = 0.5362, lr_0 = 5.2831e-04
Loss = 1.4879e-02, PNorm = 133.7457, GNorm = 0.2038, lr_0 = 5.2795e-04
Loss = 1.6423e-02, PNorm = 133.7810, GNorm = 0.4161, lr_0 = 5.2758e-04
Loss = 1.5621e-02, PNorm = 133.8133, GNorm = 1.0482, lr_0 = 5.2722e-04
Loss = 1.7002e-02, PNorm = 133.8530, GNorm = 0.3569, lr_0 = 5.2686e-04
Loss = 1.4924e-02, PNorm = 133.8851, GNorm = 0.3356, lr_0 = 5.2650e-04
Loss = 1.5343e-02, PNorm = 133.9215, GNorm = 0.3207, lr_0 = 5.2614e-04
Loss = 1.8130e-02, PNorm = 133.9548, GNorm = 0.7097, lr_0 = 5.2578e-04
Loss = 1.2685e-02, PNorm = 133.9812, GNorm = 0.2923, lr_0 = 5.2542e-04
Loss = 1.7850e-02, PNorm = 134.0119, GNorm = 0.2320, lr_0 = 5.2506e-04
Loss = 2.0114e-02, PNorm = 134.0422, GNorm = 0.4882, lr_0 = 5.2470e-04
Loss = 1.6225e-02, PNorm = 134.0796, GNorm = 0.3621, lr_0 = 5.2434e-04
Loss = 1.5933e-02, PNorm = 134.1243, GNorm = 0.1984, lr_0 = 5.2398e-04
Loss = 1.5702e-02, PNorm = 134.1599, GNorm = 0.5014, lr_0 = 5.2362e-04
Loss = 1.4986e-02, PNorm = 134.1946, GNorm = 0.2548, lr_0 = 5.2326e-04
Loss = 1.7319e-02, PNorm = 134.2260, GNorm = 0.6008, lr_0 = 5.2290e-04
Loss = 1.6925e-02, PNorm = 134.2599, GNorm = 0.5502, lr_0 = 5.2255e-04
Loss = 1.4180e-02, PNorm = 134.2956, GNorm = 0.1731, lr_0 = 5.2219e-04
Loss = 1.2622e-02, PNorm = 134.3271, GNorm = 0.3975, lr_0 = 5.2183e-04
Loss = 1.4726e-02, PNorm = 134.3603, GNorm = 0.4513, lr_0 = 5.2147e-04
Loss = 1.5656e-02, PNorm = 134.3849, GNorm = 0.4382, lr_0 = 5.2112e-04
Loss = 1.4592e-02, PNorm = 134.4207, GNorm = 0.1905, lr_0 = 5.2076e-04
Loss = 1.2942e-02, PNorm = 134.4522, GNorm = 0.2732, lr_0 = 5.2040e-04
Loss = 1.6546e-02, PNorm = 134.4860, GNorm = 0.5727, lr_0 = 5.2005e-04
Loss = 1.5892e-02, PNorm = 134.5175, GNorm = 0.4036, lr_0 = 5.1969e-04
Loss = 1.2397e-02, PNorm = 134.5527, GNorm = 0.1421, lr_0 = 5.1933e-04
Loss = 1.4584e-02, PNorm = 134.5886, GNorm = 0.2984, lr_0 = 5.1898e-04
Loss = 1.4620e-02, PNorm = 134.6248, GNorm = 0.8308, lr_0 = 5.1862e-04
Loss = 1.5027e-02, PNorm = 134.6609, GNorm = 0.2102, lr_0 = 5.1827e-04
Loss = 1.4668e-02, PNorm = 134.6992, GNorm = 0.3988, lr_0 = 5.1791e-04
Validation mae = 0.484825
Epoch 10
Loss = 1.3317e-02, PNorm = 134.7251, GNorm = 0.4194, lr_0 = 5.1756e-04
Loss = 1.3580e-02, PNorm = 134.7499, GNorm = 0.1627, lr_0 = 5.1720e-04
Loss = 1.1800e-02, PNorm = 134.7701, GNorm = 0.3776, lr_0 = 5.1685e-04
Loss = 1.2230e-02, PNorm = 134.7918, GNorm = 0.4004, lr_0 = 5.1649e-04
Loss = 1.4453e-02, PNorm = 134.8127, GNorm = 0.4764, lr_0 = 5.1614e-04
Loss = 1.4543e-02, PNorm = 134.8401, GNorm = 0.2153, lr_0 = 5.1579e-04
Loss = 1.2134e-02, PNorm = 134.8634, GNorm = 0.4773, lr_0 = 5.1543e-04
Loss = 1.2840e-02, PNorm = 134.8923, GNorm = 0.4180, lr_0 = 5.1508e-04
Loss = 1.2358e-02, PNorm = 134.9136, GNorm = 0.3523, lr_0 = 5.1473e-04
Loss = 1.1516e-02, PNorm = 134.9363, GNorm = 0.1075, lr_0 = 5.1437e-04
Loss = 1.1356e-02, PNorm = 134.9602, GNorm = 0.3065, lr_0 = 5.1402e-04
Loss = 1.1354e-02, PNorm = 134.9801, GNorm = 0.2776, lr_0 = 5.1367e-04
Loss = 1.3396e-02, PNorm = 135.0008, GNorm = 0.1888, lr_0 = 5.1332e-04
Loss = 1.0688e-02, PNorm = 135.0270, GNorm = 0.5679, lr_0 = 5.1297e-04
Loss = 1.4520e-02, PNorm = 135.0483, GNorm = 0.3744, lr_0 = 5.1262e-04
Loss = 1.2609e-02, PNorm = 135.0656, GNorm = 0.2679, lr_0 = 5.1226e-04
Loss = 1.5785e-02, PNorm = 135.0875, GNorm = 0.3793, lr_0 = 5.1191e-04
Loss = 1.1729e-02, PNorm = 135.1125, GNorm = 0.3190, lr_0 = 5.1156e-04
Loss = 1.4183e-02, PNorm = 135.1398, GNorm = 0.1402, lr_0 = 5.1121e-04
Loss = 1.0792e-02, PNorm = 135.1738, GNorm = 0.1462, lr_0 = 5.1086e-04
Loss = 1.1446e-02, PNorm = 135.2006, GNorm = 0.1742, lr_0 = 5.1051e-04
Loss = 1.1022e-02, PNorm = 135.2245, GNorm = 0.2997, lr_0 = 5.1016e-04
Loss = 1.3173e-02, PNorm = 135.2463, GNorm = 0.1918, lr_0 = 5.0981e-04
Loss = 1.1253e-02, PNorm = 135.2677, GNorm = 0.1627, lr_0 = 5.0946e-04
Loss = 1.1498e-02, PNorm = 135.2877, GNorm = 0.3770, lr_0 = 5.0911e-04
Loss = 1.3158e-02, PNorm = 135.3132, GNorm = 0.2561, lr_0 = 5.0877e-04
Loss = 1.2387e-02, PNorm = 135.3413, GNorm = 0.1693, lr_0 = 5.0842e-04
Loss = 1.1276e-02, PNorm = 135.3705, GNorm = 0.2281, lr_0 = 5.0807e-04
Loss = 1.2287e-02, PNorm = 135.3941, GNorm = 0.1671, lr_0 = 5.0772e-04
Loss = 1.1623e-02, PNorm = 135.4195, GNorm = 0.3480, lr_0 = 5.0737e-04
Loss = 1.5584e-02, PNorm = 135.4452, GNorm = 0.4897, lr_0 = 5.0703e-04
Loss = 1.5059e-02, PNorm = 135.4661, GNorm = 0.5283, lr_0 = 5.0668e-04
Loss = 1.2276e-02, PNorm = 135.4921, GNorm = 0.4169, lr_0 = 5.0633e-04
Loss = 1.0526e-02, PNorm = 135.5168, GNorm = 0.3089, lr_0 = 5.0598e-04
Loss = 1.0440e-02, PNorm = 135.5426, GNorm = 0.1779, lr_0 = 5.0564e-04
Loss = 1.1399e-02, PNorm = 135.5678, GNorm = 0.4252, lr_0 = 5.0529e-04
Loss = 1.2415e-02, PNorm = 135.5958, GNorm = 0.2310, lr_0 = 5.0494e-04
Loss = 1.2000e-02, PNorm = 135.6194, GNorm = 0.1832, lr_0 = 5.0460e-04
Loss = 1.5506e-02, PNorm = 135.6454, GNorm = 0.2556, lr_0 = 5.0425e-04
Loss = 1.1397e-02, PNorm = 135.6663, GNorm = 0.2693, lr_0 = 5.0391e-04
Loss = 1.2145e-02, PNorm = 135.6859, GNorm = 0.2034, lr_0 = 5.0356e-04
Loss = 1.0241e-02, PNorm = 135.7082, GNorm = 0.1783, lr_0 = 5.0322e-04
Loss = 1.0649e-02, PNorm = 135.7324, GNorm = 0.1164, lr_0 = 5.0287e-04
Loss = 1.1205e-02, PNorm = 135.7541, GNorm = 0.3640, lr_0 = 5.0253e-04
Loss = 8.6328e-03, PNorm = 135.7790, GNorm = 0.3797, lr_0 = 5.0218e-04
Loss = 9.8047e-03, PNorm = 135.8005, GNorm = 0.1400, lr_0 = 5.0184e-04
Loss = 1.3569e-02, PNorm = 135.8237, GNorm = 0.3673, lr_0 = 5.0150e-04
Loss = 1.2233e-02, PNorm = 135.8489, GNorm = 0.1706, lr_0 = 5.0115e-04
Loss = 1.2730e-02, PNorm = 135.8768, GNorm = 0.4295, lr_0 = 5.0081e-04
Loss = 1.4654e-02, PNorm = 135.9052, GNorm = 0.3792, lr_0 = 5.0047e-04
Loss = 1.1833e-02, PNorm = 135.9289, GNorm = 0.3293, lr_0 = 5.0012e-04
Loss = 1.1371e-02, PNorm = 135.9590, GNorm = 0.1925, lr_0 = 4.9978e-04
Loss = 1.3158e-02, PNorm = 135.9814, GNorm = 0.3282, lr_0 = 4.9944e-04
Loss = 1.2894e-02, PNorm = 136.0155, GNorm = 0.3475, lr_0 = 4.9910e-04
Loss = 1.3528e-02, PNorm = 136.0426, GNorm = 0.1175, lr_0 = 4.9875e-04
Loss = 1.0256e-02, PNorm = 136.0735, GNorm = 0.2023, lr_0 = 4.9841e-04
Loss = 1.2891e-02, PNorm = 136.0967, GNorm = 0.4835, lr_0 = 4.9807e-04
Loss = 1.2249e-02, PNorm = 136.1225, GNorm = 0.2146, lr_0 = 4.9773e-04
Loss = 1.1285e-02, PNorm = 136.1481, GNorm = 0.2220, lr_0 = 4.9739e-04
Loss = 1.1765e-02, PNorm = 136.1773, GNorm = 0.2868, lr_0 = 4.9705e-04
Loss = 1.3193e-02, PNorm = 136.2036, GNorm = 0.4064, lr_0 = 4.9671e-04
Loss = 1.2726e-02, PNorm = 136.2328, GNorm = 0.1207, lr_0 = 4.9637e-04
Loss = 1.3516e-02, PNorm = 136.2555, GNorm = 0.2419, lr_0 = 4.9603e-04
Loss = 1.2199e-02, PNorm = 136.2831, GNorm = 0.2312, lr_0 = 4.9569e-04
Loss = 1.1791e-02, PNorm = 136.3132, GNorm = 0.2468, lr_0 = 4.9535e-04
Loss = 1.1315e-02, PNorm = 136.3390, GNorm = 0.3864, lr_0 = 4.9501e-04
Loss = 1.1604e-02, PNorm = 136.3573, GNorm = 0.2487, lr_0 = 4.9467e-04
Loss = 1.0849e-02, PNorm = 136.3848, GNorm = 0.7171, lr_0 = 4.9433e-04
Loss = 1.1052e-02, PNorm = 136.4175, GNorm = 0.4845, lr_0 = 4.9399e-04
Loss = 1.1404e-02, PNorm = 136.4480, GNorm = 0.2969, lr_0 = 4.9365e-04
Loss = 1.2216e-02, PNorm = 136.4742, GNorm = 0.4682, lr_0 = 4.9332e-04
Loss = 1.3505e-02, PNorm = 136.5027, GNorm = 0.2342, lr_0 = 4.9298e-04
Loss = 1.2199e-02, PNorm = 136.5385, GNorm = 0.4023, lr_0 = 4.9264e-04
Loss = 1.1801e-02, PNorm = 136.5675, GNorm = 0.2411, lr_0 = 4.9230e-04
Loss = 1.0955e-02, PNorm = 136.5926, GNorm = 0.1589, lr_0 = 4.9197e-04
Loss = 1.3164e-02, PNorm = 136.6190, GNorm = 0.2814, lr_0 = 4.9163e-04
Loss = 1.1865e-02, PNorm = 136.6375, GNorm = 0.5155, lr_0 = 4.9129e-04
Loss = 1.6488e-02, PNorm = 136.6625, GNorm = 0.1721, lr_0 = 4.9095e-04
Loss = 1.1679e-02, PNorm = 136.6896, GNorm = 0.4267, lr_0 = 4.9062e-04
Loss = 1.1196e-02, PNorm = 136.7174, GNorm = 0.1379, lr_0 = 4.9028e-04
Loss = 1.0733e-02, PNorm = 136.7474, GNorm = 0.3252, lr_0 = 4.8995e-04
Loss = 1.1395e-02, PNorm = 136.7760, GNorm = 0.2980, lr_0 = 4.8961e-04
Loss = 1.1281e-02, PNorm = 136.8023, GNorm = 0.3212, lr_0 = 4.8928e-04
Loss = 1.5720e-02, PNorm = 136.8357, GNorm = 0.3354, lr_0 = 4.8894e-04
Loss = 1.1565e-02, PNorm = 136.8656, GNorm = 0.3723, lr_0 = 4.8861e-04
Loss = 1.1022e-02, PNorm = 136.8881, GNorm = 0.2765, lr_0 = 4.8827e-04
Loss = 1.3659e-02, PNorm = 136.9165, GNorm = 0.4746, lr_0 = 4.8794e-04
Loss = 1.3009e-02, PNorm = 136.9468, GNorm = 0.2691, lr_0 = 4.8760e-04
Loss = 1.1759e-02, PNorm = 136.9744, GNorm = 0.1662, lr_0 = 4.8727e-04
Loss = 1.1503e-02, PNorm = 137.0046, GNorm = 0.2608, lr_0 = 4.8693e-04
Loss = 1.1285e-02, PNorm = 137.0334, GNorm = 0.1725, lr_0 = 4.8660e-04
Loss = 1.0717e-02, PNorm = 137.0580, GNorm = 0.6226, lr_0 = 4.8627e-04
Loss = 1.2758e-02, PNorm = 137.0856, GNorm = 0.1699, lr_0 = 4.8593e-04
Loss = 1.4128e-02, PNorm = 137.1135, GNorm = 0.3910, lr_0 = 4.8560e-04
Loss = 1.3062e-02, PNorm = 137.1410, GNorm = 0.3893, lr_0 = 4.8527e-04
Loss = 1.2166e-02, PNorm = 137.1667, GNorm = 0.4468, lr_0 = 4.8494e-04
Loss = 1.1173e-02, PNorm = 137.1965, GNorm = 0.4521, lr_0 = 4.8460e-04
Loss = 1.2679e-02, PNorm = 137.2201, GNorm = 0.4653, lr_0 = 4.8427e-04
Loss = 1.3951e-02, PNorm = 137.2431, GNorm = 0.1127, lr_0 = 4.8394e-04
Loss = 1.3171e-02, PNorm = 137.2786, GNorm = 0.2902, lr_0 = 4.8361e-04
Loss = 1.0894e-02, PNorm = 137.3157, GNorm = 0.3300, lr_0 = 4.8328e-04
Loss = 1.2112e-02, PNorm = 137.3443, GNorm = 0.2458, lr_0 = 4.8295e-04
Loss = 1.1467e-02, PNorm = 137.3727, GNorm = 0.5169, lr_0 = 4.8262e-04
Loss = 1.1771e-02, PNorm = 137.3999, GNorm = 0.2551, lr_0 = 4.8228e-04
Loss = 1.0745e-02, PNorm = 137.4241, GNorm = 0.4281, lr_0 = 4.8195e-04
Loss = 1.3801e-02, PNorm = 137.4514, GNorm = 0.1730, lr_0 = 4.8162e-04
Loss = 1.3652e-02, PNorm = 137.4781, GNorm = 0.2695, lr_0 = 4.8129e-04
Loss = 1.4902e-02, PNorm = 137.5116, GNorm = 0.3102, lr_0 = 4.8096e-04
Loss = 1.3652e-02, PNorm = 137.5409, GNorm = 0.5598, lr_0 = 4.8064e-04
Loss = 1.2539e-02, PNorm = 137.5747, GNorm = 0.7528, lr_0 = 4.8031e-04
Loss = 1.4382e-02, PNorm = 137.6049, GNorm = 1.0136, lr_0 = 4.7998e-04
Loss = 1.3583e-02, PNorm = 137.6367, GNorm = 0.2175, lr_0 = 4.7965e-04
Loss = 1.1605e-02, PNorm = 137.6684, GNorm = 0.2875, lr_0 = 4.7932e-04
Loss = 1.2558e-02, PNorm = 137.7006, GNorm = 0.1668, lr_0 = 4.7899e-04
Loss = 1.3515e-02, PNorm = 137.7332, GNorm = 0.1744, lr_0 = 4.7866e-04
Loss = 1.0330e-02, PNorm = 137.7632, GNorm = 0.2190, lr_0 = 4.7833e-04
Loss = 1.4797e-02, PNorm = 137.7878, GNorm = 0.2525, lr_0 = 4.7801e-04
Loss = 1.2741e-02, PNorm = 137.8163, GNorm = 0.2521, lr_0 = 4.7768e-04
Loss = 1.1483e-02, PNorm = 137.8461, GNorm = 0.2905, lr_0 = 4.7735e-04
Loss = 1.2248e-02, PNorm = 137.8754, GNorm = 0.2658, lr_0 = 4.7703e-04
Validation mae = 0.482345
Epoch 11
Loss = 1.0885e-02, PNorm = 137.8952, GNorm = 0.2575, lr_0 = 4.7670e-04
Loss = 9.7581e-03, PNorm = 137.9149, GNorm = 0.3435, lr_0 = 4.7637e-04
Loss = 1.3829e-02, PNorm = 137.9362, GNorm = 0.2648, lr_0 = 4.7605e-04
Loss = 1.1281e-02, PNorm = 137.9562, GNorm = 0.3022, lr_0 = 4.7572e-04
Loss = 1.0151e-02, PNorm = 137.9789, GNorm = 0.7913, lr_0 = 4.7539e-04
Loss = 1.0746e-02, PNorm = 138.0046, GNorm = 0.2448, lr_0 = 4.7507e-04
Loss = 1.1778e-02, PNorm = 138.0220, GNorm = 0.4099, lr_0 = 4.7474e-04
Loss = 1.1940e-02, PNorm = 138.0447, GNorm = 0.2682, lr_0 = 4.7442e-04
Loss = 1.0333e-02, PNorm = 138.0624, GNorm = 0.3141, lr_0 = 4.7409e-04
Loss = 1.0856e-02, PNorm = 138.0820, GNorm = 0.2771, lr_0 = 4.7377e-04
Loss = 8.6415e-03, PNorm = 138.1043, GNorm = 0.5851, lr_0 = 4.7344e-04
Loss = 1.0309e-02, PNorm = 138.1257, GNorm = 0.2134, lr_0 = 4.7312e-04
Loss = 1.0116e-02, PNorm = 138.1494, GNorm = 0.1146, lr_0 = 4.7279e-04
Loss = 1.1454e-02, PNorm = 138.1707, GNorm = 0.2062, lr_0 = 4.7247e-04
Loss = 9.3161e-03, PNorm = 138.1930, GNorm = 0.1621, lr_0 = 4.7215e-04
Loss = 1.2288e-02, PNorm = 138.2138, GNorm = 0.4239, lr_0 = 4.7182e-04
Loss = 9.8818e-03, PNorm = 138.2358, GNorm = 0.7917, lr_0 = 4.7150e-04
Loss = 1.1243e-02, PNorm = 138.2560, GNorm = 0.1887, lr_0 = 4.7118e-04
Loss = 9.1257e-03, PNorm = 138.2780, GNorm = 0.3605, lr_0 = 4.7085e-04
Loss = 1.0628e-02, PNorm = 138.2985, GNorm = 0.1823, lr_0 = 4.7053e-04
Loss = 1.0273e-02, PNorm = 138.3207, GNorm = 0.1532, lr_0 = 4.7021e-04
Loss = 1.0424e-02, PNorm = 138.3457, GNorm = 0.2700, lr_0 = 4.6989e-04
Loss = 1.1200e-02, PNorm = 138.3649, GNorm = 0.2516, lr_0 = 4.6957e-04
Loss = 1.1683e-02, PNorm = 138.3841, GNorm = 0.2812, lr_0 = 4.6924e-04
Loss = 1.1101e-02, PNorm = 138.4058, GNorm = 0.2194, lr_0 = 4.6892e-04
Loss = 9.1963e-03, PNorm = 138.4309, GNorm = 0.2503, lr_0 = 4.6860e-04
Loss = 1.1964e-02, PNorm = 138.4543, GNorm = 0.3108, lr_0 = 4.6828e-04
Loss = 9.9353e-03, PNorm = 138.4794, GNorm = 0.3307, lr_0 = 4.6796e-04
Loss = 8.7212e-03, PNorm = 138.5026, GNorm = 0.1311, lr_0 = 4.6764e-04
Loss = 8.0205e-03, PNorm = 138.5242, GNorm = 0.2379, lr_0 = 4.6732e-04
Loss = 9.7961e-03, PNorm = 138.5492, GNorm = 0.1764, lr_0 = 4.6700e-04
Loss = 9.3852e-03, PNorm = 138.5730, GNorm = 0.3091, lr_0 = 4.6668e-04
Loss = 9.4817e-03, PNorm = 138.5953, GNorm = 0.2204, lr_0 = 4.6636e-04
Loss = 1.0318e-02, PNorm = 138.6156, GNorm = 0.1725, lr_0 = 4.6604e-04
Loss = 1.1484e-02, PNorm = 138.6363, GNorm = 0.5997, lr_0 = 4.6572e-04
Loss = 1.0799e-02, PNorm = 138.6569, GNorm = 0.4249, lr_0 = 4.6540e-04
Loss = 1.0314e-02, PNorm = 138.6835, GNorm = 0.1802, lr_0 = 4.6508e-04
Loss = 8.8588e-03, PNorm = 138.7113, GNorm = 0.2232, lr_0 = 4.6476e-04
Loss = 1.1810e-02, PNorm = 138.7316, GNorm = 0.5024, lr_0 = 4.6445e-04
Loss = 8.5505e-03, PNorm = 138.7467, GNorm = 0.3129, lr_0 = 4.6413e-04
Loss = 1.0439e-02, PNorm = 138.7599, GNorm = 0.2705, lr_0 = 4.6381e-04
Loss = 9.1581e-03, PNorm = 138.7828, GNorm = 0.1760, lr_0 = 4.6349e-04
Loss = 1.4973e-02, PNorm = 138.8045, GNorm = 0.2665, lr_0 = 4.6317e-04
Loss = 1.2305e-02, PNorm = 138.8337, GNorm = 0.8856, lr_0 = 4.6286e-04
Loss = 9.1976e-03, PNorm = 138.8550, GNorm = 0.3828, lr_0 = 4.6254e-04
Loss = 1.0118e-02, PNorm = 138.8791, GNorm = 0.3141, lr_0 = 4.6222e-04
Loss = 1.0817e-02, PNorm = 138.9076, GNorm = 0.1773, lr_0 = 4.6191e-04
Loss = 1.0723e-02, PNorm = 138.9341, GNorm = 0.5926, lr_0 = 4.6159e-04
Loss = 1.1479e-02, PNorm = 138.9565, GNorm = 0.4221, lr_0 = 4.6127e-04
Loss = 9.0397e-03, PNorm = 138.9794, GNorm = 0.2447, lr_0 = 4.6096e-04
Loss = 9.2456e-03, PNorm = 139.0045, GNorm = 0.3892, lr_0 = 4.6064e-04
Loss = 9.3169e-03, PNorm = 139.0312, GNorm = 0.3151, lr_0 = 4.6033e-04
Loss = 9.1276e-03, PNorm = 139.0532, GNorm = 0.1410, lr_0 = 4.6001e-04
Loss = 1.0023e-02, PNorm = 139.0759, GNorm = 0.1311, lr_0 = 4.5970e-04
Loss = 9.6032e-03, PNorm = 139.0999, GNorm = 0.3451, lr_0 = 4.5938e-04
Loss = 1.2253e-02, PNorm = 139.1173, GNorm = 0.1133, lr_0 = 4.5907e-04
Loss = 1.1798e-02, PNorm = 139.1422, GNorm = 0.3091, lr_0 = 4.5875e-04
Loss = 9.1735e-03, PNorm = 139.1672, GNorm = 0.2964, lr_0 = 4.5844e-04
Loss = 1.0189e-02, PNorm = 139.1885, GNorm = 0.2484, lr_0 = 4.5812e-04
Loss = 1.3399e-02, PNorm = 139.2068, GNorm = 0.3742, lr_0 = 4.5781e-04
Loss = 1.0447e-02, PNorm = 139.2277, GNorm = 0.1556, lr_0 = 4.5750e-04
Loss = 1.0388e-02, PNorm = 139.2516, GNorm = 0.1572, lr_0 = 4.5718e-04
Loss = 1.0203e-02, PNorm = 139.2778, GNorm = 0.2241, lr_0 = 4.5687e-04
Loss = 9.2124e-03, PNorm = 139.3011, GNorm = 0.1214, lr_0 = 4.5656e-04
Loss = 1.1369e-02, PNorm = 139.3248, GNorm = 0.5321, lr_0 = 4.5624e-04
Loss = 9.6043e-03, PNorm = 139.3446, GNorm = 0.5582, lr_0 = 4.5593e-04
Loss = 1.0684e-02, PNorm = 139.3680, GNorm = 0.1562, lr_0 = 4.5562e-04
Loss = 1.1892e-02, PNorm = 139.3952, GNorm = 0.6687, lr_0 = 4.5531e-04
Loss = 1.1251e-02, PNorm = 139.4177, GNorm = 0.3268, lr_0 = 4.5499e-04
Loss = 9.3318e-03, PNorm = 139.4382, GNorm = 0.4691, lr_0 = 4.5468e-04
Loss = 9.7852e-03, PNorm = 139.4641, GNorm = 0.5922, lr_0 = 4.5437e-04
Loss = 1.1655e-02, PNorm = 139.4948, GNorm = 0.2404, lr_0 = 4.5406e-04
Loss = 1.1234e-02, PNorm = 139.5195, GNorm = 0.2935, lr_0 = 4.5375e-04
Loss = 1.0248e-02, PNorm = 139.5441, GNorm = 0.0899, lr_0 = 4.5344e-04
Loss = 9.9486e-03, PNorm = 139.5654, GNorm = 0.1537, lr_0 = 4.5313e-04
Loss = 9.2300e-03, PNorm = 139.5840, GNorm = 0.4741, lr_0 = 4.5282e-04
Loss = 9.3764e-03, PNorm = 139.6065, GNorm = 0.2255, lr_0 = 4.5251e-04
Loss = 1.1710e-02, PNorm = 139.6285, GNorm = 0.4907, lr_0 = 4.5220e-04
Loss = 1.0159e-02, PNorm = 139.6563, GNorm = 0.3823, lr_0 = 4.5189e-04
Loss = 8.4581e-03, PNorm = 139.6810, GNorm = 0.1506, lr_0 = 4.5158e-04
Loss = 1.2895e-02, PNorm = 139.7064, GNorm = 0.3456, lr_0 = 4.5127e-04
Loss = 1.1489e-02, PNorm = 139.7297, GNorm = 0.2198, lr_0 = 4.5096e-04
Loss = 1.1094e-02, PNorm = 139.7532, GNorm = 0.3522, lr_0 = 4.5065e-04
Loss = 9.7729e-03, PNorm = 139.7753, GNorm = 0.3698, lr_0 = 4.5034e-04
Loss = 1.1509e-02, PNorm = 139.8004, GNorm = 0.3287, lr_0 = 4.5003e-04
Loss = 8.0461e-03, PNorm = 139.8247, GNorm = 0.3794, lr_0 = 4.4972e-04
Loss = 1.0110e-02, PNorm = 139.8529, GNorm = 0.2020, lr_0 = 4.4942e-04
Loss = 9.3867e-03, PNorm = 139.8810, GNorm = 0.2047, lr_0 = 4.4911e-04
Loss = 9.4288e-03, PNorm = 139.9035, GNorm = 0.6576, lr_0 = 4.4880e-04
Loss = 1.0391e-02, PNorm = 139.9278, GNorm = 0.1597, lr_0 = 4.4849e-04
Loss = 1.0970e-02, PNorm = 139.9493, GNorm = 0.4085, lr_0 = 4.4819e-04
Loss = 1.0209e-02, PNorm = 139.9755, GNorm = 0.1360, lr_0 = 4.4788e-04
Loss = 1.1843e-02, PNorm = 140.0027, GNorm = 0.2698, lr_0 = 4.4757e-04
Loss = 1.0397e-02, PNorm = 140.0310, GNorm = 0.3936, lr_0 = 4.4727e-04
Loss = 9.2976e-03, PNorm = 140.0583, GNorm = 0.4290, lr_0 = 4.4696e-04
Loss = 7.8656e-03, PNorm = 140.0858, GNorm = 0.2477, lr_0 = 4.4665e-04
Loss = 1.0109e-02, PNorm = 140.1124, GNorm = 0.3259, lr_0 = 4.4635e-04
Loss = 1.0961e-02, PNorm = 140.1393, GNorm = 0.1740, lr_0 = 4.4604e-04
Loss = 1.1566e-02, PNorm = 140.1647, GNorm = 0.5908, lr_0 = 4.4574e-04
Loss = 1.0993e-02, PNorm = 140.1922, GNorm = 0.1533, lr_0 = 4.4543e-04
Loss = 1.0270e-02, PNorm = 140.2186, GNorm = 0.4427, lr_0 = 4.4513e-04
Loss = 9.4261e-03, PNorm = 140.2433, GNorm = 0.5356, lr_0 = 4.4482e-04
Loss = 1.0834e-02, PNorm = 140.2736, GNorm = 0.1651, lr_0 = 4.4452e-04
Loss = 1.2029e-02, PNorm = 140.3012, GNorm = 0.5196, lr_0 = 4.4421e-04
Loss = 1.1455e-02, PNorm = 140.3220, GNorm = 0.3833, lr_0 = 4.4391e-04
Loss = 1.2059e-02, PNorm = 140.3453, GNorm = 0.2542, lr_0 = 4.4360e-04
Loss = 1.1802e-02, PNorm = 140.3740, GNorm = 0.2302, lr_0 = 4.4330e-04
Loss = 9.6022e-03, PNorm = 140.4043, GNorm = 0.3486, lr_0 = 4.4299e-04
Loss = 8.5978e-03, PNorm = 140.4335, GNorm = 0.3231, lr_0 = 4.4269e-04
Loss = 8.4046e-03, PNorm = 140.4590, GNorm = 0.1172, lr_0 = 4.4239e-04
Loss = 9.2420e-03, PNorm = 140.4828, GNorm = 0.3467, lr_0 = 4.4209e-04
Loss = 1.0497e-02, PNorm = 140.5009, GNorm = 0.4481, lr_0 = 4.4178e-04
Loss = 9.8804e-03, PNorm = 140.5235, GNorm = 0.1281, lr_0 = 4.4148e-04
Loss = 9.6720e-03, PNorm = 140.5486, GNorm = 0.1912, lr_0 = 4.4118e-04
Loss = 1.3845e-02, PNorm = 140.5750, GNorm = 0.2738, lr_0 = 4.4088e-04
Loss = 9.8385e-03, PNorm = 140.5990, GNorm = 0.1641, lr_0 = 4.4057e-04
Loss = 9.9136e-03, PNorm = 140.6189, GNorm = 0.3668, lr_0 = 4.4027e-04
Loss = 9.9219e-03, PNorm = 140.6433, GNorm = 0.3321, lr_0 = 4.3997e-04
Loss = 1.1908e-02, PNorm = 140.6642, GNorm = 0.5089, lr_0 = 4.3967e-04
Loss = 9.9679e-03, PNorm = 140.6928, GNorm = 0.3607, lr_0 = 4.3937e-04
Validation mae = 0.482638
Epoch 12
Loss = 9.3713e-03, PNorm = 140.7107, GNorm = 0.1513, lr_0 = 4.3907e-04
Loss = 8.1676e-03, PNorm = 140.7257, GNorm = 0.0999, lr_0 = 4.3877e-04
Loss = 9.1128e-03, PNorm = 140.7432, GNorm = 0.1782, lr_0 = 4.3846e-04
Loss = 7.8350e-03, PNorm = 140.7601, GNorm = 0.0889, lr_0 = 4.3816e-04
Loss = 1.0465e-02, PNorm = 140.7759, GNorm = 0.3588, lr_0 = 4.3786e-04
Loss = 8.3934e-03, PNorm = 140.7904, GNorm = 0.4191, lr_0 = 4.3756e-04
Loss = 8.0079e-03, PNorm = 140.8065, GNorm = 0.1701, lr_0 = 4.3726e-04
Loss = 8.3660e-03, PNorm = 140.8225, GNorm = 0.1321, lr_0 = 4.3696e-04
Loss = 9.1624e-03, PNorm = 140.8411, GNorm = 0.5329, lr_0 = 4.3667e-04
Loss = 9.8936e-03, PNorm = 140.8527, GNorm = 0.7418, lr_0 = 4.3637e-04
Loss = 8.3730e-03, PNorm = 140.8698, GNorm = 0.4714, lr_0 = 4.3607e-04
Loss = 7.0577e-03, PNorm = 140.8875, GNorm = 0.3324, lr_0 = 4.3577e-04
Loss = 9.4706e-03, PNorm = 140.9062, GNorm = 0.1979, lr_0 = 4.3547e-04
Loss = 7.9804e-03, PNorm = 140.9270, GNorm = 0.1790, lr_0 = 4.3517e-04
Loss = 9.1152e-03, PNorm = 140.9446, GNorm = 0.2412, lr_0 = 4.3487e-04
Loss = 9.7877e-03, PNorm = 140.9653, GNorm = 0.2634, lr_0 = 4.3458e-04
Loss = 8.8626e-03, PNorm = 140.9857, GNorm = 0.0976, lr_0 = 4.3428e-04
Loss = 9.3960e-03, PNorm = 141.0031, GNorm = 0.2982, lr_0 = 4.3398e-04
Loss = 8.0678e-03, PNorm = 141.0243, GNorm = 0.3912, lr_0 = 4.3368e-04
Loss = 6.6721e-03, PNorm = 141.0399, GNorm = 0.2121, lr_0 = 4.3339e-04
Loss = 8.5286e-03, PNorm = 141.0542, GNorm = 0.2260, lr_0 = 4.3309e-04
Loss = 7.2191e-03, PNorm = 141.0663, GNorm = 0.2037, lr_0 = 4.3279e-04
Loss = 8.0394e-03, PNorm = 141.0796, GNorm = 0.2547, lr_0 = 4.3250e-04
Loss = 8.3833e-03, PNorm = 141.0975, GNorm = 0.5288, lr_0 = 4.3220e-04
Loss = 8.0098e-03, PNorm = 141.1160, GNorm = 0.1093, lr_0 = 4.3190e-04
Loss = 9.0403e-03, PNorm = 141.1358, GNorm = 0.1372, lr_0 = 4.3161e-04
Loss = 7.1831e-03, PNorm = 141.1550, GNorm = 0.1830, lr_0 = 4.3131e-04
Loss = 8.1511e-03, PNorm = 141.1759, GNorm = 0.2938, lr_0 = 4.3102e-04
Loss = 9.3186e-03, PNorm = 141.1960, GNorm = 0.0959, lr_0 = 4.3072e-04
Loss = 8.1708e-03, PNorm = 141.2144, GNorm = 0.1532, lr_0 = 4.3043e-04
Loss = 8.7928e-03, PNorm = 141.2366, GNorm = 0.1601, lr_0 = 4.3013e-04
Loss = 8.0978e-03, PNorm = 141.2553, GNorm = 0.1909, lr_0 = 4.2984e-04
Loss = 8.9799e-03, PNorm = 141.2686, GNorm = 0.1010, lr_0 = 4.2954e-04
Loss = 9.6701e-03, PNorm = 141.2896, GNorm = 0.3142, lr_0 = 4.2925e-04
Loss = 1.1027e-02, PNorm = 141.3086, GNorm = 0.2833, lr_0 = 4.2895e-04
Loss = 8.2583e-03, PNorm = 141.3231, GNorm = 0.4456, lr_0 = 4.2866e-04
Loss = 7.4395e-03, PNorm = 141.3425, GNorm = 0.2450, lr_0 = 4.2837e-04
Loss = 7.3587e-03, PNorm = 141.3596, GNorm = 0.1911, lr_0 = 4.2807e-04
Loss = 7.7613e-03, PNorm = 141.3805, GNorm = 0.1241, lr_0 = 4.2778e-04
Loss = 7.4748e-03, PNorm = 141.3993, GNorm = 0.4417, lr_0 = 4.2749e-04
Loss = 6.8317e-03, PNorm = 141.4191, GNorm = 0.4596, lr_0 = 4.2719e-04
Loss = 6.3209e-03, PNorm = 141.4367, GNorm = 0.1769, lr_0 = 4.2690e-04
Loss = 8.3222e-03, PNorm = 141.4547, GNorm = 0.2985, lr_0 = 4.2661e-04
Loss = 1.1727e-02, PNorm = 141.4747, GNorm = 0.1827, lr_0 = 4.2632e-04
Loss = 7.8521e-03, PNorm = 141.4952, GNorm = 0.2109, lr_0 = 4.2602e-04
Loss = 9.5222e-03, PNorm = 141.5151, GNorm = 0.1078, lr_0 = 4.2573e-04
Loss = 7.5952e-03, PNorm = 141.5347, GNorm = 0.2063, lr_0 = 4.2544e-04
Loss = 8.4684e-03, PNorm = 141.5553, GNorm = 0.1558, lr_0 = 4.2515e-04
Loss = 8.2268e-03, PNorm = 141.5757, GNorm = 0.2689, lr_0 = 4.2486e-04
Loss = 8.9676e-03, PNorm = 141.5947, GNorm = 0.1869, lr_0 = 4.2457e-04
Loss = 7.9855e-03, PNorm = 141.6138, GNorm = 0.1420, lr_0 = 4.2428e-04
Loss = 9.5596e-03, PNorm = 141.6365, GNorm = 0.2016, lr_0 = 4.2399e-04
Loss = 7.0118e-03, PNorm = 141.6543, GNorm = 0.1703, lr_0 = 4.2370e-04
Loss = 7.4195e-03, PNorm = 141.6711, GNorm = 0.0913, lr_0 = 4.2340e-04
Loss = 8.2092e-03, PNorm = 141.6912, GNorm = 0.1602, lr_0 = 4.2311e-04
Loss = 7.4549e-03, PNorm = 141.7089, GNorm = 0.4163, lr_0 = 4.2283e-04
Loss = 7.3405e-03, PNorm = 141.7281, GNorm = 0.2340, lr_0 = 4.2254e-04
Loss = 8.1008e-03, PNorm = 141.7468, GNorm = 0.3098, lr_0 = 4.2225e-04
Loss = 8.6572e-03, PNorm = 141.7640, GNorm = 0.2146, lr_0 = 4.2196e-04
Loss = 8.2166e-03, PNorm = 141.7803, GNorm = 0.1313, lr_0 = 4.2167e-04
Loss = 8.0012e-03, PNorm = 141.7986, GNorm = 0.2031, lr_0 = 4.2138e-04
Loss = 9.3198e-03, PNorm = 141.8178, GNorm = 0.2573, lr_0 = 4.2109e-04
Loss = 9.6317e-03, PNorm = 141.8354, GNorm = 0.1614, lr_0 = 4.2080e-04
Loss = 8.8389e-03, PNorm = 141.8559, GNorm = 0.3521, lr_0 = 4.2051e-04
Loss = 8.0293e-03, PNorm = 141.8735, GNorm = 0.3131, lr_0 = 4.2023e-04
Loss = 8.5804e-03, PNorm = 141.8920, GNorm = 0.1552, lr_0 = 4.1994e-04
Loss = 8.4838e-03, PNorm = 141.9125, GNorm = 0.1286, lr_0 = 4.1965e-04
Loss = 8.9543e-03, PNorm = 141.9307, GNorm = 0.7429, lr_0 = 4.1936e-04
Loss = 9.1822e-03, PNorm = 141.9583, GNorm = 0.1887, lr_0 = 4.1907e-04
Loss = 9.7344e-03, PNorm = 141.9810, GNorm = 0.3468, lr_0 = 4.1879e-04
Loss = 9.4657e-03, PNorm = 142.0027, GNorm = 0.1273, lr_0 = 4.1850e-04
Loss = 7.5913e-03, PNorm = 142.0260, GNorm = 0.2480, lr_0 = 4.1821e-04
Loss = 8.9943e-03, PNorm = 142.0504, GNorm = 0.1396, lr_0 = 4.1793e-04
Loss = 9.9018e-03, PNorm = 142.0740, GNorm = 0.2187, lr_0 = 4.1764e-04
Loss = 8.4458e-03, PNorm = 142.0979, GNorm = 0.3691, lr_0 = 4.1736e-04
Loss = 7.8328e-03, PNorm = 142.1168, GNorm = 0.1250, lr_0 = 4.1707e-04
Loss = 9.1958e-03, PNorm = 142.1363, GNorm = 0.2304, lr_0 = 4.1678e-04
Loss = 7.7209e-03, PNorm = 142.1624, GNorm = 0.1234, lr_0 = 4.1650e-04
Loss = 9.1516e-03, PNorm = 142.1847, GNorm = 0.1588, lr_0 = 4.1621e-04
Loss = 7.8314e-03, PNorm = 142.2025, GNorm = 0.1239, lr_0 = 4.1593e-04
Loss = 8.0163e-03, PNorm = 142.2214, GNorm = 0.4252, lr_0 = 4.1564e-04
Loss = 1.0166e-02, PNorm = 142.2390, GNorm = 0.5953, lr_0 = 4.1536e-04
Loss = 9.1410e-03, PNorm = 142.2623, GNorm = 0.0946, lr_0 = 4.1507e-04
Loss = 1.0570e-02, PNorm = 142.2803, GNorm = 0.3961, lr_0 = 4.1479e-04
Loss = 8.1036e-03, PNorm = 142.2930, GNorm = 0.1387, lr_0 = 4.1450e-04
Loss = 1.0741e-02, PNorm = 142.3097, GNorm = 0.1041, lr_0 = 4.1422e-04
Loss = 9.7281e-03, PNorm = 142.3330, GNorm = 0.2183, lr_0 = 4.1394e-04
Loss = 9.1920e-03, PNorm = 142.3536, GNorm = 0.4071, lr_0 = 4.1365e-04
Loss = 9.0498e-03, PNorm = 142.3744, GNorm = 0.3669, lr_0 = 4.1337e-04
Loss = 1.0459e-02, PNorm = 142.3959, GNorm = 0.6102, lr_0 = 4.1309e-04
Loss = 1.0059e-02, PNorm = 142.4214, GNorm = 0.3630, lr_0 = 4.1280e-04
Loss = 9.7699e-03, PNorm = 142.4430, GNorm = 0.7646, lr_0 = 4.1252e-04
Loss = 8.3622e-03, PNorm = 142.4623, GNorm = 0.2614, lr_0 = 4.1224e-04
Loss = 8.9534e-03, PNorm = 142.4842, GNorm = 0.3933, lr_0 = 4.1196e-04
Loss = 8.6421e-03, PNorm = 142.5085, GNorm = 0.2207, lr_0 = 4.1167e-04
Loss = 7.6390e-03, PNorm = 142.5317, GNorm = 0.1224, lr_0 = 4.1139e-04
Loss = 1.3741e-02, PNorm = 142.5530, GNorm = 0.2249, lr_0 = 4.1111e-04
Loss = 7.6300e-03, PNorm = 142.5776, GNorm = 0.4158, lr_0 = 4.1083e-04
Loss = 1.0744e-02, PNorm = 142.5996, GNorm = 0.4647, lr_0 = 4.1055e-04
Loss = 1.2377e-02, PNorm = 142.6221, GNorm = 0.2563, lr_0 = 4.1027e-04
Loss = 8.6500e-03, PNorm = 142.6474, GNorm = 0.3129, lr_0 = 4.0998e-04
Loss = 8.8280e-03, PNorm = 142.6697, GNorm = 0.3995, lr_0 = 4.0970e-04
Loss = 9.1495e-03, PNorm = 142.6940, GNorm = 0.2142, lr_0 = 4.0942e-04
Loss = 7.6297e-03, PNorm = 142.7119, GNorm = 0.3104, lr_0 = 4.0914e-04
Loss = 7.7480e-03, PNorm = 142.7293, GNorm = 0.2517, lr_0 = 4.0886e-04
Loss = 8.5558e-03, PNorm = 142.7436, GNorm = 0.1280, lr_0 = 4.0858e-04
Loss = 9.4550e-03, PNorm = 142.7631, GNorm = 0.5824, lr_0 = 4.0830e-04
Loss = 7.7053e-03, PNorm = 142.7819, GNorm = 0.9035, lr_0 = 4.0802e-04
Loss = 9.5897e-03, PNorm = 142.8073, GNorm = 0.6128, lr_0 = 4.0774e-04
Loss = 9.0993e-03, PNorm = 142.8252, GNorm = 0.4450, lr_0 = 4.0746e-04
Loss = 1.2616e-02, PNorm = 142.8449, GNorm = 0.2609, lr_0 = 4.0718e-04
Loss = 1.0365e-02, PNorm = 142.8703, GNorm = 1.1828, lr_0 = 4.0691e-04
Loss = 9.6942e-03, PNorm = 142.8912, GNorm = 0.0893, lr_0 = 4.0663e-04
Loss = 8.3705e-03, PNorm = 142.9118, GNorm = 0.3725, lr_0 = 4.0635e-04
Loss = 1.0233e-02, PNorm = 142.9341, GNorm = 0.3968, lr_0 = 4.0607e-04
Loss = 8.0292e-03, PNorm = 142.9555, GNorm = 0.1668, lr_0 = 4.0579e-04
Loss = 1.3035e-02, PNorm = 142.9757, GNorm = 0.2840, lr_0 = 4.0551e-04
Loss = 8.9854e-03, PNorm = 142.9973, GNorm = 0.2921, lr_0 = 4.0524e-04
Loss = 9.2201e-03, PNorm = 143.0199, GNorm = 0.2851, lr_0 = 4.0496e-04
Loss = 8.8009e-03, PNorm = 143.0381, GNorm = 0.4532, lr_0 = 4.0468e-04
Validation mae = 0.482171
Epoch 13
Loss = 5.9326e-03, PNorm = 143.0537, GNorm = 0.3786, lr_0 = 4.0440e-04
Loss = 7.8235e-03, PNorm = 143.0671, GNorm = 0.0816, lr_0 = 4.0413e-04
Loss = 7.4484e-03, PNorm = 143.0830, GNorm = 0.1415, lr_0 = 4.0385e-04
Loss = 9.2654e-03, PNorm = 143.0986, GNorm = 0.3405, lr_0 = 4.0357e-04
Loss = 9.7493e-03, PNorm = 143.1155, GNorm = 0.1800, lr_0 = 4.0330e-04
Loss = 8.4078e-03, PNorm = 143.1351, GNorm = 0.3622, lr_0 = 4.0302e-04
Loss = 6.7776e-03, PNorm = 143.1470, GNorm = 0.1876, lr_0 = 4.0274e-04
Loss = 7.5754e-03, PNorm = 143.1628, GNorm = 0.2394, lr_0 = 4.0247e-04
Loss = 7.1799e-03, PNorm = 143.1787, GNorm = 0.3561, lr_0 = 4.0219e-04
Loss = 7.6107e-03, PNorm = 143.1955, GNorm = 0.1028, lr_0 = 4.0192e-04
Loss = 6.9090e-03, PNorm = 143.2071, GNorm = 0.2851, lr_0 = 4.0164e-04
Loss = 6.9183e-03, PNorm = 143.2221, GNorm = 0.3107, lr_0 = 4.0137e-04
Loss = 9.0522e-03, PNorm = 143.2361, GNorm = 0.1385, lr_0 = 4.0109e-04
Loss = 7.5597e-03, PNorm = 143.2535, GNorm = 0.3910, lr_0 = 4.0082e-04
Loss = 8.9808e-03, PNorm = 143.2680, GNorm = 0.2580, lr_0 = 4.0054e-04
Loss = 7.3614e-03, PNorm = 143.2816, GNorm = 0.3496, lr_0 = 4.0027e-04
Loss = 6.7294e-03, PNorm = 143.2935, GNorm = 0.2240, lr_0 = 3.9999e-04
Loss = 7.9396e-03, PNorm = 143.3112, GNorm = 0.1349, lr_0 = 3.9972e-04
Loss = 5.1760e-03, PNorm = 143.3224, GNorm = 0.0888, lr_0 = 3.9945e-04
Loss = 7.7193e-03, PNorm = 143.3366, GNorm = 0.2803, lr_0 = 3.9917e-04
Loss = 7.8389e-03, PNorm = 143.3514, GNorm = 0.1954, lr_0 = 3.9890e-04
Loss = 8.5076e-03, PNorm = 143.3669, GNorm = 0.4022, lr_0 = 3.9863e-04
Loss = 7.3801e-03, PNorm = 143.3876, GNorm = 0.2996, lr_0 = 3.9835e-04
Loss = 6.9405e-03, PNorm = 143.4045, GNorm = 0.1031, lr_0 = 3.9808e-04
Loss = 6.1936e-03, PNorm = 143.4193, GNorm = 0.3398, lr_0 = 3.9781e-04
Loss = 7.4922e-03, PNorm = 143.4312, GNorm = 0.0861, lr_0 = 3.9753e-04
Loss = 8.5427e-03, PNorm = 143.4499, GNorm = 0.5629, lr_0 = 3.9726e-04
Loss = 8.1610e-03, PNorm = 143.4692, GNorm = 0.1616, lr_0 = 3.9699e-04
Loss = 6.5674e-03, PNorm = 143.4859, GNorm = 0.1553, lr_0 = 3.9672e-04
Loss = 8.1286e-03, PNorm = 143.5010, GNorm = 0.0839, lr_0 = 3.9645e-04
Loss = 7.2853e-03, PNorm = 143.5156, GNorm = 0.2609, lr_0 = 3.9617e-04
Loss = 7.0071e-03, PNorm = 143.5290, GNorm = 0.2238, lr_0 = 3.9590e-04
Loss = 8.6118e-03, PNorm = 143.5475, GNorm = 0.3521, lr_0 = 3.9563e-04
Loss = 6.4799e-03, PNorm = 143.5654, GNorm = 0.1157, lr_0 = 3.9536e-04
Loss = 6.0947e-03, PNorm = 143.5797, GNorm = 0.1074, lr_0 = 3.9509e-04
Loss = 8.8466e-03, PNorm = 143.5951, GNorm = 0.1930, lr_0 = 3.9482e-04
Loss = 6.4321e-03, PNorm = 143.6119, GNorm = 0.2418, lr_0 = 3.9455e-04
Loss = 6.3323e-03, PNorm = 143.6254, GNorm = 0.2024, lr_0 = 3.9428e-04
Loss = 7.2738e-03, PNorm = 143.6385, GNorm = 0.1044, lr_0 = 3.9401e-04
Loss = 6.4461e-03, PNorm = 143.6522, GNorm = 0.3364, lr_0 = 3.9374e-04
Loss = 7.0094e-03, PNorm = 143.6705, GNorm = 0.2499, lr_0 = 3.9347e-04
Loss = 6.6705e-03, PNorm = 143.6846, GNorm = 0.2311, lr_0 = 3.9320e-04
Loss = 6.7392e-03, PNorm = 143.6996, GNorm = 0.2088, lr_0 = 3.9293e-04
Loss = 7.7394e-03, PNorm = 143.7154, GNorm = 0.4518, lr_0 = 3.9266e-04
Loss = 7.3098e-03, PNorm = 143.7319, GNorm = 0.2009, lr_0 = 3.9239e-04
Loss = 8.3094e-03, PNorm = 143.7488, GNorm = 0.1705, lr_0 = 3.9212e-04
Loss = 7.2594e-03, PNorm = 143.7648, GNorm = 0.2473, lr_0 = 3.9185e-04
Loss = 6.3136e-03, PNorm = 143.7816, GNorm = 0.2136, lr_0 = 3.9159e-04
Loss = 7.9737e-03, PNorm = 143.7961, GNorm = 0.3403, lr_0 = 3.9132e-04
Loss = 7.2422e-03, PNorm = 143.8122, GNorm = 0.1663, lr_0 = 3.9105e-04
Loss = 7.9251e-03, PNorm = 143.8256, GNorm = 0.2362, lr_0 = 3.9078e-04
Loss = 7.9314e-03, PNorm = 143.8404, GNorm = 0.1951, lr_0 = 3.9051e-04
Loss = 6.8627e-03, PNorm = 143.8581, GNorm = 0.0944, lr_0 = 3.9025e-04
Loss = 7.3546e-03, PNorm = 143.8789, GNorm = 0.2986, lr_0 = 3.8998e-04
Loss = 7.6447e-03, PNorm = 143.8918, GNorm = 0.5248, lr_0 = 3.8971e-04
Loss = 6.8702e-03, PNorm = 143.9091, GNorm = 0.2146, lr_0 = 3.8945e-04
Loss = 6.8317e-03, PNorm = 143.9231, GNorm = 0.1559, lr_0 = 3.8918e-04
Loss = 6.6723e-03, PNorm = 143.9388, GNorm = 0.1897, lr_0 = 3.8891e-04
Loss = 5.8064e-03, PNorm = 143.9573, GNorm = 0.1533, lr_0 = 3.8865e-04
Loss = 8.4863e-03, PNorm = 143.9744, GNorm = 0.1310, lr_0 = 3.8838e-04
Loss = 7.8128e-03, PNorm = 143.9910, GNorm = 0.2425, lr_0 = 3.8811e-04
Loss = 6.5507e-03, PNorm = 144.0056, GNorm = 0.2138, lr_0 = 3.8785e-04
Loss = 7.6813e-03, PNorm = 144.0169, GNorm = 0.2412, lr_0 = 3.8758e-04
Loss = 7.4226e-03, PNorm = 144.0341, GNorm = 0.3333, lr_0 = 3.8732e-04
Loss = 6.0190e-03, PNorm = 144.0491, GNorm = 0.1128, lr_0 = 3.8705e-04
Loss = 6.3465e-03, PNorm = 144.0653, GNorm = 0.0923, lr_0 = 3.8679e-04
Loss = 8.5978e-03, PNorm = 144.0818, GNorm = 0.3233, lr_0 = 3.8652e-04
Loss = 6.0456e-03, PNorm = 144.0990, GNorm = 0.5391, lr_0 = 3.8626e-04
Loss = 7.6607e-03, PNorm = 144.1137, GNorm = 0.1542, lr_0 = 3.8599e-04
Loss = 6.6795e-03, PNorm = 144.1282, GNorm = 0.2351, lr_0 = 3.8573e-04
Loss = 5.7839e-03, PNorm = 144.1425, GNorm = 0.3473, lr_0 = 3.8546e-04
Loss = 8.1021e-03, PNorm = 144.1527, GNorm = 0.0647, lr_0 = 3.8520e-04
Loss = 6.9907e-03, PNorm = 144.1663, GNorm = 0.3899, lr_0 = 3.8493e-04
Loss = 7.1068e-03, PNorm = 144.1847, GNorm = 0.2628, lr_0 = 3.8467e-04
Loss = 8.2256e-03, PNorm = 144.2051, GNorm = 0.1173, lr_0 = 3.8441e-04
Loss = 6.6959e-03, PNorm = 144.2239, GNorm = 0.0797, lr_0 = 3.8414e-04
Loss = 6.4143e-03, PNorm = 144.2345, GNorm = 0.1697, lr_0 = 3.8388e-04
Loss = 6.4530e-03, PNorm = 144.2500, GNorm = 0.1936, lr_0 = 3.8362e-04
Loss = 8.5230e-03, PNorm = 144.2651, GNorm = 0.3280, lr_0 = 3.8336e-04
Loss = 5.8213e-03, PNorm = 144.2822, GNorm = 0.2091, lr_0 = 3.8309e-04
Loss = 6.6785e-03, PNorm = 144.2938, GNorm = 0.1874, lr_0 = 3.8283e-04
Loss = 7.9478e-03, PNorm = 144.3089, GNorm = 0.1321, lr_0 = 3.8257e-04
Loss = 6.0779e-03, PNorm = 144.3233, GNorm = 0.5185, lr_0 = 3.8231e-04
Loss = 9.5665e-03, PNorm = 144.3401, GNorm = 0.2386, lr_0 = 3.8204e-04
Loss = 9.1576e-03, PNorm = 144.3539, GNorm = 0.4512, lr_0 = 3.8178e-04
Loss = 8.6747e-03, PNorm = 144.3647, GNorm = 0.4150, lr_0 = 3.8152e-04
Loss = 9.4246e-03, PNorm = 144.3858, GNorm = 0.2230, lr_0 = 3.8126e-04
Loss = 7.7771e-03, PNorm = 144.4077, GNorm = 0.1105, lr_0 = 3.8100e-04
Loss = 9.1982e-03, PNorm = 144.4233, GNorm = 0.2453, lr_0 = 3.8074e-04
Loss = 8.0980e-03, PNorm = 144.4372, GNorm = 0.2634, lr_0 = 3.8048e-04
Loss = 6.3856e-03, PNorm = 144.4568, GNorm = 0.0875, lr_0 = 3.8022e-04
Loss = 6.7011e-03, PNorm = 144.4733, GNorm = 0.4627, lr_0 = 3.7995e-04
Loss = 7.5874e-03, PNorm = 144.4906, GNorm = 0.1346, lr_0 = 3.7969e-04
Loss = 6.8287e-03, PNorm = 144.5103, GNorm = 0.1610, lr_0 = 3.7943e-04
Loss = 6.1025e-03, PNorm = 144.5244, GNorm = 0.5465, lr_0 = 3.7917e-04
Loss = 8.0073e-03, PNorm = 144.5458, GNorm = 0.2070, lr_0 = 3.7891e-04
Loss = 8.2744e-03, PNorm = 144.5659, GNorm = 0.3223, lr_0 = 3.7866e-04
Loss = 9.0358e-03, PNorm = 144.5852, GNorm = 0.2864, lr_0 = 3.7840e-04
Loss = 7.0533e-03, PNorm = 144.5992, GNorm = 0.4163, lr_0 = 3.7814e-04
Loss = 7.4059e-03, PNorm = 144.6117, GNorm = 0.2895, lr_0 = 3.7788e-04
Loss = 6.7806e-03, PNorm = 144.6270, GNorm = 0.1580, lr_0 = 3.7762e-04
Loss = 9.5480e-03, PNorm = 144.6436, GNorm = 0.2627, lr_0 = 3.7736e-04
Loss = 6.9142e-03, PNorm = 144.6618, GNorm = 0.4094, lr_0 = 3.7710e-04
Loss = 8.1869e-03, PNorm = 144.6802, GNorm = 0.0836, lr_0 = 3.7684e-04
Loss = 6.3534e-03, PNorm = 144.6949, GNorm = 0.1679, lr_0 = 3.7659e-04
Loss = 9.7237e-03, PNorm = 144.7083, GNorm = 0.2900, lr_0 = 3.7633e-04
Loss = 7.6014e-03, PNorm = 144.7272, GNorm = 0.4427, lr_0 = 3.7607e-04
Loss = 6.1806e-03, PNorm = 144.7475, GNorm = 0.2987, lr_0 = 3.7581e-04
Loss = 6.3178e-03, PNorm = 144.7649, GNorm = 0.1331, lr_0 = 3.7555e-04
Loss = 7.9191e-03, PNorm = 144.7801, GNorm = 0.4889, lr_0 = 3.7530e-04
Loss = 8.2013e-03, PNorm = 144.7967, GNorm = 0.0883, lr_0 = 3.7504e-04
Loss = 6.6743e-03, PNorm = 144.8156, GNorm = 0.5193, lr_0 = 3.7478e-04
Loss = 5.3443e-03, PNorm = 144.8296, GNorm = 0.2345, lr_0 = 3.7453e-04
Loss = 6.9377e-03, PNorm = 144.8471, GNorm = 0.1044, lr_0 = 3.7427e-04
Loss = 6.4546e-03, PNorm = 144.8620, GNorm = 0.0845, lr_0 = 3.7401e-04
Loss = 7.0191e-03, PNorm = 144.8791, GNorm = 0.3085, lr_0 = 3.7376e-04
Loss = 1.0213e-02, PNorm = 144.8987, GNorm = 0.3014, lr_0 = 3.7350e-04
Loss = 8.3800e-03, PNorm = 144.9172, GNorm = 0.2377, lr_0 = 3.7325e-04
Loss = 8.3628e-03, PNorm = 144.9372, GNorm = 0.1993, lr_0 = 3.7299e-04
Loss = 6.9161e-03, PNorm = 144.9574, GNorm = 0.3365, lr_0 = 3.7273e-04
Validation mae = 0.478591
Epoch 14
Loss = 7.4005e-03, PNorm = 144.9771, GNorm = 0.2725, lr_0 = 3.7248e-04
Loss = 6.9211e-03, PNorm = 144.9888, GNorm = 0.3594, lr_0 = 3.7222e-04
Loss = 6.2379e-03, PNorm = 144.9995, GNorm = 0.1457, lr_0 = 3.7197e-04
Loss = 5.3469e-03, PNorm = 145.0082, GNorm = 0.1427, lr_0 = 3.7171e-04
Loss = 5.8996e-03, PNorm = 145.0197, GNorm = 0.0960, lr_0 = 3.7146e-04
Loss = 6.2441e-03, PNorm = 145.0303, GNorm = 0.0912, lr_0 = 3.7120e-04
Loss = 6.5738e-03, PNorm = 145.0420, GNorm = 0.1704, lr_0 = 3.7095e-04
Loss = 5.9106e-03, PNorm = 145.0560, GNorm = 0.1525, lr_0 = 3.7070e-04
Loss = 5.2668e-03, PNorm = 145.0661, GNorm = 0.3021, lr_0 = 3.7044e-04
Loss = 7.5140e-03, PNorm = 145.0739, GNorm = 0.1549, lr_0 = 3.7019e-04
Loss = 6.8217e-03, PNorm = 145.0861, GNorm = 0.3610, lr_0 = 3.6993e-04
Loss = 5.5400e-03, PNorm = 145.0969, GNorm = 0.1203, lr_0 = 3.6968e-04
Loss = 5.4801e-03, PNorm = 145.1074, GNorm = 0.1295, lr_0 = 3.6943e-04
Loss = 6.1105e-03, PNorm = 145.1180, GNorm = 0.5023, lr_0 = 3.6917e-04
Loss = 5.5093e-03, PNorm = 145.1271, GNorm = 0.0990, lr_0 = 3.6892e-04
Loss = 6.0772e-03, PNorm = 145.1419, GNorm = 0.1674, lr_0 = 3.6867e-04
Loss = 7.3530e-03, PNorm = 145.1534, GNorm = 0.1032, lr_0 = 3.6842e-04
Loss = 6.3125e-03, PNorm = 145.1602, GNorm = 0.1878, lr_0 = 3.6816e-04
Loss = 5.2810e-03, PNorm = 145.1705, GNorm = 0.3855, lr_0 = 3.6791e-04
Loss = 7.0761e-03, PNorm = 145.1838, GNorm = 0.6052, lr_0 = 3.6766e-04
Loss = 7.8858e-03, PNorm = 145.1966, GNorm = 0.1936, lr_0 = 3.6741e-04
Loss = 5.7924e-03, PNorm = 145.2084, GNorm = 0.2241, lr_0 = 3.6716e-04
Loss = 5.4728e-03, PNorm = 145.2210, GNorm = 0.0750, lr_0 = 3.6690e-04
Loss = 5.7822e-03, PNorm = 145.2352, GNorm = 0.2077, lr_0 = 3.6665e-04
Loss = 5.4367e-03, PNorm = 145.2487, GNorm = 0.1996, lr_0 = 3.6640e-04
Loss = 4.5792e-03, PNorm = 145.2600, GNorm = 0.1529, lr_0 = 3.6615e-04
Loss = 6.7162e-03, PNorm = 145.2742, GNorm = 0.1488, lr_0 = 3.6590e-04
Loss = 6.9934e-03, PNorm = 145.2888, GNorm = 0.1035, lr_0 = 3.6565e-04
Loss = 5.7141e-03, PNorm = 145.3050, GNorm = 0.2112, lr_0 = 3.6540e-04
Loss = 5.9622e-03, PNorm = 145.3206, GNorm = 0.0960, lr_0 = 3.6515e-04
Loss = 5.4977e-03, PNorm = 145.3339, GNorm = 0.3755, lr_0 = 3.6490e-04
Loss = 5.6933e-03, PNorm = 145.3430, GNorm = 0.1089, lr_0 = 3.6465e-04
Loss = 5.6302e-03, PNorm = 145.3570, GNorm = 0.0927, lr_0 = 3.6440e-04
Loss = 7.7183e-03, PNorm = 145.3708, GNorm = 0.4198, lr_0 = 3.6415e-04
Loss = 5.5694e-03, PNorm = 145.3828, GNorm = 0.1302, lr_0 = 3.6390e-04
Loss = 5.2812e-03, PNorm = 145.3925, GNorm = 0.1544, lr_0 = 3.6365e-04
Loss = 7.1463e-03, PNorm = 145.4059, GNorm = 0.2580, lr_0 = 3.6340e-04
Loss = 6.0737e-03, PNorm = 145.4212, GNorm = 0.2766, lr_0 = 3.6315e-04
Loss = 5.5331e-03, PNorm = 145.4354, GNorm = 0.2492, lr_0 = 3.6290e-04
Loss = 5.4866e-03, PNorm = 145.4499, GNorm = 0.1480, lr_0 = 3.6266e-04
Loss = 5.9316e-03, PNorm = 145.4654, GNorm = 0.1244, lr_0 = 3.6241e-04
Loss = 6.4442e-03, PNorm = 145.4753, GNorm = 0.2856, lr_0 = 3.6216e-04
Loss = 5.7143e-03, PNorm = 145.4859, GNorm = 0.2196, lr_0 = 3.6191e-04
Loss = 5.3789e-03, PNorm = 145.4938, GNorm = 0.2105, lr_0 = 3.6166e-04
Loss = 5.1142e-03, PNorm = 145.5064, GNorm = 0.4730, lr_0 = 3.6141e-04
Loss = 5.0289e-03, PNorm = 145.5181, GNorm = 0.2173, lr_0 = 3.6117e-04
Loss = 6.6122e-03, PNorm = 145.5336, GNorm = 0.5143, lr_0 = 3.6092e-04
Loss = 8.0907e-03, PNorm = 145.5517, GNorm = 0.1789, lr_0 = 3.6067e-04
Loss = 5.9477e-03, PNorm = 145.5713, GNorm = 0.2480, lr_0 = 3.6043e-04
Loss = 6.0948e-03, PNorm = 145.5848, GNorm = 0.2465, lr_0 = 3.6018e-04
Loss = 4.7222e-03, PNorm = 145.6017, GNorm = 0.1427, lr_0 = 3.5993e-04
Loss = 8.2683e-03, PNorm = 145.6135, GNorm = 0.2976, lr_0 = 3.5969e-04
Loss = 5.1875e-03, PNorm = 145.6248, GNorm = 0.2280, lr_0 = 3.5944e-04
Loss = 6.2700e-03, PNorm = 145.6372, GNorm = 0.2213, lr_0 = 3.5919e-04
Loss = 5.5392e-03, PNorm = 145.6479, GNorm = 0.1280, lr_0 = 3.5895e-04
Loss = 5.6902e-03, PNorm = 145.6600, GNorm = 0.2618, lr_0 = 3.5870e-04
Loss = 5.9171e-03, PNorm = 145.6711, GNorm = 0.2185, lr_0 = 3.5845e-04
Loss = 7.3055e-03, PNorm = 145.6832, GNorm = 0.3069, lr_0 = 3.5821e-04
Loss = 7.5845e-03, PNorm = 145.6951, GNorm = 0.2072, lr_0 = 3.5796e-04
Loss = 7.0859e-03, PNorm = 145.7084, GNorm = 0.0702, lr_0 = 3.5772e-04
Loss = 5.2773e-03, PNorm = 145.7268, GNorm = 0.1731, lr_0 = 3.5747e-04
Loss = 6.6176e-03, PNorm = 145.7456, GNorm = 0.3948, lr_0 = 3.5723e-04
Loss = 6.0354e-03, PNorm = 145.7629, GNorm = 0.1236, lr_0 = 3.5698e-04
Loss = 5.0887e-03, PNorm = 145.7763, GNorm = 0.1793, lr_0 = 3.5674e-04
Loss = 7.0908e-03, PNorm = 145.7937, GNorm = 0.1127, lr_0 = 3.5650e-04
Loss = 6.7800e-03, PNorm = 145.8074, GNorm = 0.1473, lr_0 = 3.5625e-04
Loss = 5.7854e-03, PNorm = 145.8222, GNorm = 0.4071, lr_0 = 3.5601e-04
Loss = 6.9471e-03, PNorm = 145.8352, GNorm = 0.1127, lr_0 = 3.5576e-04
Loss = 6.3057e-03, PNorm = 145.8517, GNorm = 0.2255, lr_0 = 3.5552e-04
Loss = 5.4907e-03, PNorm = 145.8656, GNorm = 0.3153, lr_0 = 3.5528e-04
Loss = 7.6288e-03, PNorm = 145.8778, GNorm = 0.2003, lr_0 = 3.5503e-04
Loss = 5.1373e-03, PNorm = 145.8905, GNorm = 0.2047, lr_0 = 3.5479e-04
Loss = 6.3511e-03, PNorm = 145.9043, GNorm = 0.3187, lr_0 = 3.5455e-04
Loss = 6.8656e-03, PNorm = 145.9172, GNorm = 0.2426, lr_0 = 3.5430e-04
Loss = 5.2664e-03, PNorm = 145.9280, GNorm = 0.2379, lr_0 = 3.5406e-04
Loss = 4.5834e-03, PNorm = 145.9408, GNorm = 0.1129, lr_0 = 3.5382e-04
Loss = 8.9065e-03, PNorm = 145.9565, GNorm = 0.2538, lr_0 = 3.5358e-04
Loss = 5.5857e-03, PNorm = 145.9749, GNorm = 0.2487, lr_0 = 3.5333e-04
Loss = 7.4296e-03, PNorm = 145.9899, GNorm = 0.3656, lr_0 = 3.5309e-04
Loss = 5.5960e-03, PNorm = 146.0052, GNorm = 0.3176, lr_0 = 3.5285e-04
Loss = 5.1014e-03, PNorm = 146.0179, GNorm = 0.4082, lr_0 = 3.5261e-04
Loss = 6.6616e-03, PNorm = 146.0255, GNorm = 0.2708, lr_0 = 3.5237e-04
Loss = 6.7333e-03, PNorm = 146.0438, GNorm = 0.4391, lr_0 = 3.5212e-04
Loss = 8.3877e-03, PNorm = 146.0563, GNorm = 0.4352, lr_0 = 3.5188e-04
Loss = 6.4200e-03, PNorm = 146.0683, GNorm = 0.2286, lr_0 = 3.5164e-04
Loss = 5.1059e-03, PNorm = 146.0805, GNorm = 0.1912, lr_0 = 3.5140e-04
Loss = 7.0898e-03, PNorm = 146.0926, GNorm = 0.3039, lr_0 = 3.5116e-04
Loss = 5.7457e-03, PNorm = 146.1048, GNorm = 0.2958, lr_0 = 3.5092e-04
Loss = 6.2656e-03, PNorm = 146.1210, GNorm = 0.2942, lr_0 = 3.5068e-04
Loss = 5.8023e-03, PNorm = 146.1366, GNorm = 0.1437, lr_0 = 3.5044e-04
Loss = 6.4520e-03, PNorm = 146.1571, GNorm = 0.2222, lr_0 = 3.5020e-04
Loss = 7.3677e-03, PNorm = 146.1709, GNorm = 0.0977, lr_0 = 3.4996e-04
Loss = 5.3549e-03, PNorm = 146.1831, GNorm = 0.2860, lr_0 = 3.4972e-04
Loss = 5.8577e-03, PNorm = 146.1975, GNorm = 0.1544, lr_0 = 3.4948e-04
Loss = 6.0022e-03, PNorm = 146.2102, GNorm = 0.2267, lr_0 = 3.4924e-04
Loss = 5.8472e-03, PNorm = 146.2246, GNorm = 0.1009, lr_0 = 3.4900e-04
Loss = 7.7923e-03, PNorm = 146.2400, GNorm = 0.3695, lr_0 = 3.4876e-04
Loss = 4.7556e-03, PNorm = 146.2517, GNorm = 0.1200, lr_0 = 3.4852e-04
Loss = 8.6726e-03, PNorm = 146.2613, GNorm = 0.2820, lr_0 = 3.4828e-04
Loss = 6.5418e-03, PNorm = 146.2763, GNorm = 0.0744, lr_0 = 3.4805e-04
Loss = 6.1680e-03, PNorm = 146.2932, GNorm = 0.1202, lr_0 = 3.4781e-04
Loss = 5.4219e-03, PNorm = 146.3065, GNorm = 0.1016, lr_0 = 3.4757e-04
Loss = 7.9773e-03, PNorm = 146.3212, GNorm = 0.2051, lr_0 = 3.4733e-04
Loss = 4.7408e-03, PNorm = 146.3337, GNorm = 0.1394, lr_0 = 3.4709e-04
Loss = 5.6901e-03, PNorm = 146.3493, GNorm = 0.3410, lr_0 = 3.4686e-04
Loss = 7.7278e-03, PNorm = 146.3642, GNorm = 0.2960, lr_0 = 3.4662e-04
Loss = 5.3158e-03, PNorm = 146.3776, GNorm = 0.1146, lr_0 = 3.4638e-04
Loss = 5.6745e-03, PNorm = 146.3880, GNorm = 0.1292, lr_0 = 3.4614e-04
Loss = 9.3068e-03, PNorm = 146.4012, GNorm = 0.4028, lr_0 = 3.4591e-04
Loss = 6.4567e-03, PNorm = 146.4166, GNorm = 0.1806, lr_0 = 3.4567e-04
Loss = 5.5059e-03, PNorm = 146.4316, GNorm = 0.2389, lr_0 = 3.4543e-04
Loss = 7.2035e-03, PNorm = 146.4448, GNorm = 0.0844, lr_0 = 3.4520e-04
Loss = 6.0043e-03, PNorm = 146.4602, GNorm = 0.1469, lr_0 = 3.4496e-04
Loss = 6.5004e-03, PNorm = 146.4800, GNorm = 0.2871, lr_0 = 3.4472e-04
Loss = 7.4480e-03, PNorm = 146.4941, GNorm = 0.2589, lr_0 = 3.4449e-04
Loss = 8.3571e-03, PNorm = 146.5120, GNorm = 0.3413, lr_0 = 3.4425e-04
Loss = 5.8247e-03, PNorm = 146.5295, GNorm = 0.1251, lr_0 = 3.4402e-04
Loss = 5.2728e-03, PNorm = 146.5412, GNorm = 0.2056, lr_0 = 3.4378e-04
Loss = 6.5356e-03, PNorm = 146.5525, GNorm = 0.1887, lr_0 = 3.4354e-04
Loss = 4.9463e-03, PNorm = 146.5646, GNorm = 0.2597, lr_0 = 3.4331e-04
Validation mae = 0.479547
Epoch 15
Loss = 4.4834e-03, PNorm = 146.5757, GNorm = 0.0865, lr_0 = 3.4307e-04
Loss = 6.2568e-03, PNorm = 146.5838, GNorm = 0.3222, lr_0 = 3.4284e-04
Loss = 6.7272e-03, PNorm = 146.5902, GNorm = 0.2750, lr_0 = 3.4260e-04
Loss = 4.6666e-03, PNorm = 146.6009, GNorm = 0.2549, lr_0 = 3.4237e-04
Loss = 5.7574e-03, PNorm = 146.6103, GNorm = 0.2193, lr_0 = 3.4213e-04
Loss = 6.0032e-03, PNorm = 146.6206, GNorm = 0.1364, lr_0 = 3.4190e-04
Loss = 5.2964e-03, PNorm = 146.6289, GNorm = 0.2578, lr_0 = 3.4167e-04
Loss = 5.0734e-03, PNorm = 146.6420, GNorm = 0.2255, lr_0 = 3.4143e-04
Loss = 5.7666e-03, PNorm = 146.6531, GNorm = 0.1140, lr_0 = 3.4120e-04
Loss = 5.3229e-03, PNorm = 146.6628, GNorm = 0.1863, lr_0 = 3.4096e-04
Loss = 4.9588e-03, PNorm = 146.6704, GNorm = 0.0669, lr_0 = 3.4073e-04
Loss = 5.1541e-03, PNorm = 146.6797, GNorm = 0.1123, lr_0 = 3.4050e-04
Loss = 5.6198e-03, PNorm = 146.6892, GNorm = 0.1091, lr_0 = 3.4026e-04
Loss = 5.6660e-03, PNorm = 146.7067, GNorm = 0.2124, lr_0 = 3.4003e-04
Loss = 4.2861e-03, PNorm = 146.7206, GNorm = 0.1427, lr_0 = 3.3980e-04
Loss = 4.3870e-03, PNorm = 146.7311, GNorm = 0.0914, lr_0 = 3.3956e-04
Loss = 6.3118e-03, PNorm = 146.7372, GNorm = 0.1972, lr_0 = 3.3933e-04
Loss = 6.6475e-03, PNorm = 146.7511, GNorm = 0.5989, lr_0 = 3.3910e-04
Loss = 5.8773e-03, PNorm = 146.7658, GNorm = 0.1176, lr_0 = 3.3887e-04
Loss = 6.8891e-03, PNorm = 146.7812, GNorm = 0.1131, lr_0 = 3.3864e-04
Loss = 6.1150e-03, PNorm = 146.7925, GNorm = 0.1104, lr_0 = 3.3840e-04
Loss = 6.3190e-03, PNorm = 146.8012, GNorm = 0.1463, lr_0 = 3.3817e-04
Loss = 4.8209e-03, PNorm = 146.8113, GNorm = 0.0862, lr_0 = 3.3794e-04
Loss = 5.4822e-03, PNorm = 146.8210, GNorm = 0.3819, lr_0 = 3.3771e-04
Loss = 4.4911e-03, PNorm = 146.8321, GNorm = 0.2243, lr_0 = 3.3748e-04
Loss = 4.6404e-03, PNorm = 146.8439, GNorm = 0.1187, lr_0 = 3.3725e-04
Loss = 5.3767e-03, PNorm = 146.8552, GNorm = 0.2077, lr_0 = 3.3701e-04
Loss = 5.7445e-03, PNorm = 146.8687, GNorm = 0.2830, lr_0 = 3.3678e-04
Loss = 4.7403e-03, PNorm = 146.8787, GNorm = 0.2600, lr_0 = 3.3655e-04
Loss = 5.1764e-03, PNorm = 146.8911, GNorm = 0.1993, lr_0 = 3.3632e-04
Loss = 5.1682e-03, PNorm = 146.9040, GNorm = 0.3144, lr_0 = 3.3609e-04
Loss = 9.9046e-03, PNorm = 146.9155, GNorm = 0.1736, lr_0 = 3.3586e-04
Loss = 7.9628e-03, PNorm = 146.9275, GNorm = 0.2344, lr_0 = 3.3563e-04
Loss = 4.9719e-03, PNorm = 146.9360, GNorm = 0.2193, lr_0 = 3.3540e-04
Loss = 8.6520e-03, PNorm = 146.9464, GNorm = 0.8839, lr_0 = 3.3517e-04
Loss = 4.5955e-03, PNorm = 146.9556, GNorm = 0.1556, lr_0 = 3.3494e-04
Loss = 6.0818e-03, PNorm = 146.9690, GNorm = 0.3272, lr_0 = 3.3471e-04
Loss = 7.3552e-03, PNorm = 146.9841, GNorm = 0.2485, lr_0 = 3.3448e-04
Loss = 4.4101e-03, PNorm = 146.9920, GNorm = 0.2899, lr_0 = 3.3425e-04
Loss = 5.3728e-03, PNorm = 147.0023, GNorm = 0.2537, lr_0 = 3.3403e-04
Loss = 4.8247e-03, PNorm = 147.0115, GNorm = 0.1895, lr_0 = 3.3380e-04
Loss = 4.2257e-03, PNorm = 147.0217, GNorm = 0.3924, lr_0 = 3.3357e-04
Loss = 7.2738e-03, PNorm = 147.0266, GNorm = 0.2105, lr_0 = 3.3334e-04
Loss = 5.5207e-03, PNorm = 147.0364, GNorm = 0.3561, lr_0 = 3.3311e-04
Loss = 4.7988e-03, PNorm = 147.0515, GNorm = 0.3158, lr_0 = 3.3288e-04
Loss = 5.6079e-03, PNorm = 147.0672, GNorm = 0.1909, lr_0 = 3.3265e-04
Loss = 6.9150e-03, PNorm = 147.0804, GNorm = 0.3517, lr_0 = 3.3243e-04
Loss = 4.8627e-03, PNorm = 147.0932, GNorm = 0.0854, lr_0 = 3.3220e-04
Loss = 4.9264e-03, PNorm = 147.1052, GNorm = 0.1047, lr_0 = 3.3197e-04
Loss = 4.5033e-03, PNorm = 147.1158, GNorm = 0.1778, lr_0 = 3.3174e-04
Loss = 5.1398e-03, PNorm = 147.1258, GNorm = 0.2363, lr_0 = 3.3152e-04
Loss = 4.8150e-03, PNorm = 147.1405, GNorm = 0.2633, lr_0 = 3.3129e-04
Loss = 4.9768e-03, PNorm = 147.1562, GNorm = 0.2790, lr_0 = 3.3106e-04
Loss = 3.9744e-03, PNorm = 147.1675, GNorm = 0.2052, lr_0 = 3.3084e-04
Loss = 4.2738e-03, PNorm = 147.1790, GNorm = 0.2390, lr_0 = 3.3061e-04
Loss = 7.1017e-03, PNorm = 147.1884, GNorm = 0.3326, lr_0 = 3.3038e-04
Loss = 6.4104e-03, PNorm = 147.2023, GNorm = 0.2617, lr_0 = 3.3016e-04
Loss = 6.4084e-03, PNorm = 147.2126, GNorm = 0.0980, lr_0 = 3.2993e-04
Loss = 4.3012e-03, PNorm = 147.2248, GNorm = 0.2176, lr_0 = 3.2970e-04
Loss = 5.0259e-03, PNorm = 147.2367, GNorm = 0.3307, lr_0 = 3.2948e-04
Loss = 6.3749e-03, PNorm = 147.2499, GNorm = 0.2171, lr_0 = 3.2925e-04
Loss = 4.8889e-03, PNorm = 147.2609, GNorm = 0.1972, lr_0 = 3.2903e-04
Loss = 7.3224e-03, PNorm = 147.2751, GNorm = 0.1435, lr_0 = 3.2880e-04
Loss = 5.1493e-03, PNorm = 147.2897, GNorm = 0.1452, lr_0 = 3.2858e-04
Loss = 5.7803e-03, PNorm = 147.3027, GNorm = 0.1698, lr_0 = 3.2835e-04
Loss = 7.5812e-03, PNorm = 147.3162, GNorm = 0.3904, lr_0 = 3.2813e-04
Loss = 6.5029e-03, PNorm = 147.3292, GNorm = 0.1982, lr_0 = 3.2790e-04
Loss = 4.5640e-03, PNorm = 147.3401, GNorm = 0.0521, lr_0 = 3.2768e-04
Loss = 5.4251e-03, PNorm = 147.3504, GNorm = 0.3903, lr_0 = 3.2745e-04
Loss = 5.3891e-03, PNorm = 147.3615, GNorm = 0.0564, lr_0 = 3.2723e-04
Loss = 4.1230e-03, PNorm = 147.3733, GNorm = 0.2851, lr_0 = 3.2700e-04
Loss = 1.0049e-02, PNorm = 147.3843, GNorm = 0.0934, lr_0 = 3.2678e-04
Loss = 5.5987e-03, PNorm = 147.4001, GNorm = 0.1873, lr_0 = 3.2656e-04
Loss = 4.8355e-03, PNorm = 147.4097, GNorm = 0.2937, lr_0 = 3.2633e-04
Loss = 4.6647e-03, PNorm = 147.4242, GNorm = 0.1847, lr_0 = 3.2611e-04
Loss = 4.9629e-03, PNorm = 147.4376, GNorm = 0.3236, lr_0 = 3.2589e-04
Loss = 4.6421e-03, PNorm = 147.4464, GNorm = 0.2373, lr_0 = 3.2566e-04
Loss = 5.6482e-03, PNorm = 147.4582, GNorm = 0.0884, lr_0 = 3.2544e-04
Loss = 5.7964e-03, PNorm = 147.4679, GNorm = 0.4825, lr_0 = 3.2522e-04
Loss = 3.9691e-03, PNorm = 147.4827, GNorm = 0.0805, lr_0 = 3.2499e-04
Loss = 4.2982e-03, PNorm = 147.4961, GNorm = 0.2231, lr_0 = 3.2477e-04
Loss = 4.5775e-03, PNorm = 147.5060, GNorm = 0.0745, lr_0 = 3.2455e-04
Loss = 4.2322e-03, PNorm = 147.5197, GNorm = 0.2138, lr_0 = 3.2433e-04
Loss = 4.6729e-03, PNorm = 147.5318, GNorm = 0.3409, lr_0 = 3.2410e-04
Loss = 7.1834e-03, PNorm = 147.5439, GNorm = 0.2346, lr_0 = 3.2388e-04
Loss = 5.6998e-03, PNorm = 147.5568, GNorm = 0.1599, lr_0 = 3.2366e-04
Loss = 4.5270e-03, PNorm = 147.5670, GNorm = 0.0684, lr_0 = 3.2344e-04
Loss = 4.3192e-03, PNorm = 147.5764, GNorm = 0.1565, lr_0 = 3.2322e-04
Loss = 6.0472e-03, PNorm = 147.5867, GNorm = 0.2112, lr_0 = 3.2300e-04
Loss = 4.9546e-03, PNorm = 147.5956, GNorm = 0.1226, lr_0 = 3.2277e-04
Loss = 6.0637e-03, PNorm = 147.6077, GNorm = 0.2803, lr_0 = 3.2255e-04
Loss = 4.4670e-03, PNorm = 147.6199, GNorm = 0.3207, lr_0 = 3.2233e-04
Loss = 4.6430e-03, PNorm = 147.6325, GNorm = 0.1773, lr_0 = 3.2211e-04
Loss = 5.8024e-03, PNorm = 147.6435, GNorm = 0.3659, lr_0 = 3.2189e-04
Loss = 6.4702e-03, PNorm = 147.6577, GNorm = 0.5331, lr_0 = 3.2167e-04
Loss = 5.0906e-03, PNorm = 147.6684, GNorm = 0.1372, lr_0 = 3.2145e-04
Loss = 5.2456e-03, PNorm = 147.6799, GNorm = 0.1315, lr_0 = 3.2123e-04
Loss = 4.3990e-03, PNorm = 147.6943, GNorm = 0.1133, lr_0 = 3.2101e-04
Loss = 4.1830e-03, PNorm = 147.7039, GNorm = 0.1482, lr_0 = 3.2079e-04
Loss = 4.4435e-03, PNorm = 147.7147, GNorm = 0.2096, lr_0 = 3.2057e-04
Loss = 4.6467e-03, PNorm = 147.7304, GNorm = 0.2530, lr_0 = 3.2035e-04
Loss = 5.4054e-03, PNorm = 147.7421, GNorm = 0.2115, lr_0 = 3.2013e-04
Loss = 4.1217e-03, PNorm = 147.7525, GNorm = 0.2591, lr_0 = 3.1991e-04
Loss = 6.9238e-03, PNorm = 147.7657, GNorm = 0.0841, lr_0 = 3.1969e-04
Loss = 5.3198e-03, PNorm = 147.7768, GNorm = 0.2052, lr_0 = 3.1947e-04
Loss = 4.6696e-03, PNorm = 147.7896, GNorm = 0.1437, lr_0 = 3.1925e-04
Loss = 6.6287e-03, PNorm = 147.8027, GNorm = 0.3026, lr_0 = 3.1904e-04
Loss = 4.9137e-03, PNorm = 147.8182, GNorm = 0.1320, lr_0 = 3.1882e-04
Loss = 4.5577e-03, PNorm = 147.8291, GNorm = 0.1476, lr_0 = 3.1860e-04
Loss = 4.1364e-03, PNorm = 147.8404, GNorm = 0.2373, lr_0 = 3.1838e-04
Loss = 4.6826e-03, PNorm = 147.8481, GNorm = 0.2559, lr_0 = 3.1816e-04
Loss = 5.5445e-03, PNorm = 147.8580, GNorm = 0.0815, lr_0 = 3.1794e-04
Loss = 7.7589e-03, PNorm = 147.8696, GNorm = 0.3366, lr_0 = 3.1773e-04
Loss = 4.4375e-03, PNorm = 147.8780, GNorm = 0.2240, lr_0 = 3.1751e-04
Loss = 5.2919e-03, PNorm = 147.8877, GNorm = 0.2056, lr_0 = 3.1729e-04
Loss = 4.8735e-03, PNorm = 147.8974, GNorm = 0.2063, lr_0 = 3.1707e-04
Loss = 4.9964e-03, PNorm = 147.9127, GNorm = 0.0611, lr_0 = 3.1686e-04
Loss = 5.4984e-03, PNorm = 147.9250, GNorm = 0.2942, lr_0 = 3.1664e-04
Loss = 5.7648e-03, PNorm = 147.9357, GNorm = 0.0582, lr_0 = 3.1642e-04
Loss = 5.4242e-03, PNorm = 147.9506, GNorm = 0.2871, lr_0 = 3.1621e-04
Validation mae = 0.479906
Epoch 16
Loss = 4.3244e-03, PNorm = 147.9582, GNorm = 0.1272, lr_0 = 3.1599e-04
Loss = 4.6753e-03, PNorm = 147.9640, GNorm = 0.2113, lr_0 = 3.1577e-04
Loss = 6.7775e-03, PNorm = 147.9683, GNorm = 0.1854, lr_0 = 3.1556e-04
Loss = 7.5290e-03, PNorm = 147.9753, GNorm = 0.2449, lr_0 = 3.1534e-04
Loss = 5.2773e-03, PNorm = 147.9863, GNorm = 0.1864, lr_0 = 3.1512e-04
Loss = 4.0292e-03, PNorm = 147.9988, GNorm = 0.1720, lr_0 = 3.1491e-04
Loss = 4.8584e-03, PNorm = 148.0089, GNorm = 0.1486, lr_0 = 3.1469e-04
Loss = 5.2366e-03, PNorm = 148.0174, GNorm = 0.2133, lr_0 = 3.1448e-04
Loss = 5.6828e-03, PNorm = 148.0267, GNorm = 0.2447, lr_0 = 3.1426e-04
Loss = 4.4824e-03, PNorm = 148.0343, GNorm = 0.6754, lr_0 = 3.1405e-04
Loss = 4.7587e-03, PNorm = 148.0421, GNorm = 0.0888, lr_0 = 3.1383e-04
Loss = 6.3299e-03, PNorm = 148.0507, GNorm = 0.2207, lr_0 = 3.1362e-04
Loss = 4.0161e-03, PNorm = 148.0603, GNorm = 0.1466, lr_0 = 3.1340e-04
Loss = 3.7415e-03, PNorm = 148.0700, GNorm = 0.1647, lr_0 = 3.1319e-04
Loss = 4.2184e-03, PNorm = 148.0806, GNorm = 0.1934, lr_0 = 3.1297e-04
Loss = 6.4101e-03, PNorm = 148.0929, GNorm = 0.0875, lr_0 = 3.1276e-04
Loss = 4.4393e-03, PNorm = 148.1033, GNorm = 0.1903, lr_0 = 3.1254e-04
Loss = 3.6968e-03, PNorm = 148.1095, GNorm = 0.1446, lr_0 = 3.1233e-04
Loss = 5.7759e-03, PNorm = 148.1181, GNorm = 0.2542, lr_0 = 3.1212e-04
Loss = 4.7896e-03, PNorm = 148.1270, GNorm = 0.4107, lr_0 = 3.1190e-04
Loss = 4.0412e-03, PNorm = 148.1401, GNorm = 0.2656, lr_0 = 3.1169e-04
Loss = 4.1932e-03, PNorm = 148.1505, GNorm = 0.0897, lr_0 = 3.1147e-04
Loss = 3.1862e-03, PNorm = 148.1613, GNorm = 0.1056, lr_0 = 3.1126e-04
Loss = 4.4361e-03, PNorm = 148.1681, GNorm = 0.1567, lr_0 = 3.1105e-04
Loss = 5.3287e-03, PNorm = 148.1767, GNorm = 0.0939, lr_0 = 3.1083e-04
Loss = 3.9771e-03, PNorm = 148.1843, GNorm = 0.1526, lr_0 = 3.1062e-04
Loss = 4.0258e-03, PNorm = 148.1938, GNorm = 0.1585, lr_0 = 3.1041e-04
Loss = 5.9887e-03, PNorm = 148.2021, GNorm = 0.2246, lr_0 = 3.1020e-04
Loss = 4.6642e-03, PNorm = 148.2126, GNorm = 0.1433, lr_0 = 3.0998e-04
Loss = 4.3406e-03, PNorm = 148.2209, GNorm = 0.2001, lr_0 = 3.0977e-04
Loss = 6.4248e-03, PNorm = 148.2306, GNorm = 0.2917, lr_0 = 3.0956e-04
Loss = 4.4470e-03, PNorm = 148.2393, GNorm = 0.1536, lr_0 = 3.0935e-04
Loss = 4.7843e-03, PNorm = 148.2464, GNorm = 0.3040, lr_0 = 3.0914e-04
Loss = 3.6623e-03, PNorm = 148.2522, GNorm = 0.1633, lr_0 = 3.0892e-04
Loss = 3.6083e-03, PNorm = 148.2625, GNorm = 0.0547, lr_0 = 3.0871e-04
Loss = 5.6763e-03, PNorm = 148.2722, GNorm = 0.2600, lr_0 = 3.0850e-04
Loss = 5.7958e-03, PNorm = 148.2799, GNorm = 0.4495, lr_0 = 3.0829e-04
Loss = 3.6268e-03, PNorm = 148.2910, GNorm = 0.2373, lr_0 = 3.0808e-04
Loss = 3.9314e-03, PNorm = 148.2986, GNorm = 0.1109, lr_0 = 3.0787e-04
Loss = 4.1728e-03, PNorm = 148.3111, GNorm = 0.1252, lr_0 = 3.0766e-04
Loss = 3.6042e-03, PNorm = 148.3181, GNorm = 0.1664, lr_0 = 3.0745e-04
Loss = 4.2444e-03, PNorm = 148.3245, GNorm = 0.0844, lr_0 = 3.0723e-04
Loss = 3.9455e-03, PNorm = 148.3349, GNorm = 0.1909, lr_0 = 3.0702e-04
Loss = 4.6777e-03, PNorm = 148.3450, GNorm = 0.2042, lr_0 = 3.0681e-04
Loss = 4.3309e-03, PNorm = 148.3506, GNorm = 0.2166, lr_0 = 3.0660e-04
Loss = 4.1036e-03, PNorm = 148.3557, GNorm = 0.0630, lr_0 = 3.0639e-04
Loss = 5.0415e-03, PNorm = 148.3657, GNorm = 0.3921, lr_0 = 3.0618e-04
Loss = 3.7539e-03, PNorm = 148.3743, GNorm = 0.3169, lr_0 = 3.0597e-04
Loss = 4.8910e-03, PNorm = 148.3845, GNorm = 0.1780, lr_0 = 3.0576e-04
Loss = 3.9950e-03, PNorm = 148.3974, GNorm = 0.1474, lr_0 = 3.0555e-04
Loss = 3.9218e-03, PNorm = 148.4074, GNorm = 0.1677, lr_0 = 3.0535e-04
Loss = 4.3518e-03, PNorm = 148.4162, GNorm = 0.3459, lr_0 = 3.0514e-04
Loss = 4.2734e-03, PNorm = 148.4241, GNorm = 0.2781, lr_0 = 3.0493e-04
Loss = 4.9545e-03, PNorm = 148.4300, GNorm = 0.1119, lr_0 = 3.0472e-04
Loss = 3.2774e-03, PNorm = 148.4369, GNorm = 0.0729, lr_0 = 3.0451e-04
Loss = 4.0917e-03, PNorm = 148.4461, GNorm = 0.4005, lr_0 = 3.0430e-04
Loss = 3.6407e-03, PNorm = 148.4535, GNorm = 0.0824, lr_0 = 3.0409e-04
Loss = 4.1842e-03, PNorm = 148.4621, GNorm = 0.2730, lr_0 = 3.0388e-04
Loss = 4.8944e-03, PNorm = 148.4749, GNorm = 0.3962, lr_0 = 3.0368e-04
Loss = 3.9145e-03, PNorm = 148.4852, GNorm = 0.2405, lr_0 = 3.0347e-04
Loss = 6.5464e-03, PNorm = 148.4964, GNorm = 0.2189, lr_0 = 3.0326e-04
Loss = 4.3767e-03, PNorm = 148.5055, GNorm = 0.3210, lr_0 = 3.0305e-04
Loss = 4.7524e-03, PNorm = 148.5114, GNorm = 0.2765, lr_0 = 3.0284e-04
Loss = 4.2883e-03, PNorm = 148.5220, GNorm = 0.0708, lr_0 = 3.0264e-04
Loss = 3.5671e-03, PNorm = 148.5319, GNorm = 0.1823, lr_0 = 3.0243e-04
Loss = 5.1920e-03, PNorm = 148.5423, GNorm = 0.1360, lr_0 = 3.0222e-04
Loss = 4.3228e-03, PNorm = 148.5513, GNorm = 0.1823, lr_0 = 3.0202e-04
Loss = 4.3678e-03, PNorm = 148.5628, GNorm = 0.1215, lr_0 = 3.0181e-04
Loss = 4.3679e-03, PNorm = 148.5699, GNorm = 0.1993, lr_0 = 3.0160e-04
Loss = 4.3454e-03, PNorm = 148.5825, GNorm = 0.0912, lr_0 = 3.0140e-04
Loss = 5.3923e-03, PNorm = 148.5914, GNorm = 0.4348, lr_0 = 3.0119e-04
Loss = 4.1509e-03, PNorm = 148.6012, GNorm = 0.2918, lr_0 = 3.0098e-04
Loss = 6.2263e-03, PNorm = 148.6090, GNorm = 0.2912, lr_0 = 3.0078e-04
Loss = 7.2874e-03, PNorm = 148.6219, GNorm = 0.2167, lr_0 = 3.0057e-04
Loss = 4.7835e-03, PNorm = 148.6336, GNorm = 0.2038, lr_0 = 3.0036e-04
Loss = 3.4342e-03, PNorm = 148.6465, GNorm = 0.1228, lr_0 = 3.0016e-04
Loss = 3.7055e-03, PNorm = 148.6576, GNorm = 0.1111, lr_0 = 2.9995e-04
Loss = 4.9930e-03, PNorm = 148.6688, GNorm = 0.1760, lr_0 = 2.9975e-04
Loss = 4.5398e-03, PNorm = 148.6761, GNorm = 0.2535, lr_0 = 2.9954e-04
Loss = 5.3990e-03, PNorm = 148.6863, GNorm = 0.4074, lr_0 = 2.9934e-04
Loss = 4.0918e-03, PNorm = 148.7009, GNorm = 0.1475, lr_0 = 2.9913e-04
Loss = 4.5473e-03, PNorm = 148.7102, GNorm = 0.1948, lr_0 = 2.9893e-04
Loss = 4.5256e-03, PNorm = 148.7202, GNorm = 0.1568, lr_0 = 2.9872e-04
Loss = 4.5900e-03, PNorm = 148.7276, GNorm = 0.1508, lr_0 = 2.9852e-04
Loss = 4.7583e-03, PNorm = 148.7358, GNorm = 0.0943, lr_0 = 2.9831e-04
Loss = 5.3281e-03, PNorm = 148.7461, GNorm = 0.2208, lr_0 = 2.9811e-04
Loss = 5.3588e-03, PNorm = 148.7580, GNorm = 0.2280, lr_0 = 2.9790e-04
Loss = 4.5617e-03, PNorm = 148.7708, GNorm = 0.2506, lr_0 = 2.9770e-04
Loss = 4.6653e-03, PNorm = 148.7794, GNorm = 0.1581, lr_0 = 2.9750e-04
Loss = 4.5421e-03, PNorm = 148.7885, GNorm = 0.2733, lr_0 = 2.9729e-04
Loss = 5.0462e-03, PNorm = 148.7998, GNorm = 0.3018, lr_0 = 2.9709e-04
Loss = 3.9250e-03, PNorm = 148.8117, GNorm = 0.1801, lr_0 = 2.9689e-04
Loss = 3.9289e-03, PNorm = 148.8186, GNorm = 0.2022, lr_0 = 2.9668e-04
Loss = 3.7779e-03, PNorm = 148.8243, GNorm = 0.1347, lr_0 = 2.9648e-04
Loss = 4.1872e-03, PNorm = 148.8303, GNorm = 0.1538, lr_0 = 2.9628e-04
Loss = 4.5546e-03, PNorm = 148.8374, GNorm = 0.3798, lr_0 = 2.9607e-04
Loss = 5.7918e-03, PNorm = 148.8426, GNorm = 0.3936, lr_0 = 2.9587e-04
Loss = 4.1476e-03, PNorm = 148.8522, GNorm = 0.3175, lr_0 = 2.9567e-04
Loss = 4.0080e-03, PNorm = 148.8607, GNorm = 0.2711, lr_0 = 2.9546e-04
Loss = 3.9138e-03, PNorm = 148.8689, GNorm = 0.2566, lr_0 = 2.9526e-04
Loss = 3.5799e-03, PNorm = 148.8773, GNorm = 0.1078, lr_0 = 2.9506e-04
Loss = 3.9727e-03, PNorm = 148.8863, GNorm = 0.0892, lr_0 = 2.9486e-04
Loss = 5.7279e-03, PNorm = 148.8944, GNorm = 0.2198, lr_0 = 2.9466e-04
Loss = 3.9041e-03, PNorm = 148.9050, GNorm = 0.3015, lr_0 = 2.9445e-04
Loss = 1.0783e-02, PNorm = 148.9146, GNorm = 0.1599, lr_0 = 2.9425e-04
Loss = 5.4328e-03, PNorm = 148.9279, GNorm = 0.1929, lr_0 = 2.9405e-04
Loss = 4.7674e-03, PNorm = 148.9409, GNorm = 0.0990, lr_0 = 2.9385e-04
Loss = 4.2534e-03, PNorm = 148.9523, GNorm = 0.1502, lr_0 = 2.9365e-04
Loss = 3.6350e-03, PNorm = 148.9625, GNorm = 0.0799, lr_0 = 2.9345e-04
Loss = 4.7895e-03, PNorm = 148.9678, GNorm = 0.2025, lr_0 = 2.9325e-04
Loss = 3.5592e-03, PNorm = 148.9762, GNorm = 0.3124, lr_0 = 2.9305e-04
Loss = 4.3943e-03, PNorm = 148.9841, GNorm = 0.1462, lr_0 = 2.9284e-04
Loss = 4.2281e-03, PNorm = 148.9954, GNorm = 0.1102, lr_0 = 2.9264e-04
Loss = 4.1143e-03, PNorm = 149.0085, GNorm = 0.2467, lr_0 = 2.9244e-04
Loss = 5.5324e-03, PNorm = 149.0177, GNorm = 0.1671, lr_0 = 2.9224e-04
Loss = 5.2860e-03, PNorm = 149.0294, GNorm = 0.1257, lr_0 = 2.9204e-04
Loss = 4.7323e-03, PNorm = 149.0391, GNorm = 0.2201, lr_0 = 2.9184e-04
Loss = 3.8485e-03, PNorm = 149.0464, GNorm = 0.2455, lr_0 = 2.9164e-04
Loss = 4.1623e-03, PNorm = 149.0565, GNorm = 0.0937, lr_0 = 2.9144e-04
Loss = 3.4857e-03, PNorm = 149.0660, GNorm = 0.1126, lr_0 = 2.9124e-04
Validation mae = 0.478053
Epoch 17
Loss = 5.0801e-03, PNorm = 149.0708, GNorm = 0.1397, lr_0 = 2.9104e-04
Loss = 4.2786e-03, PNorm = 149.0764, GNorm = 0.1142, lr_0 = 2.9084e-04
Loss = 4.4405e-03, PNorm = 149.0834, GNorm = 0.1117, lr_0 = 2.9065e-04
Loss = 4.3385e-03, PNorm = 149.0907, GNorm = 0.0714, lr_0 = 2.9045e-04
Loss = 4.7702e-03, PNorm = 149.0977, GNorm = 0.1511, lr_0 = 2.9025e-04
Loss = 4.7248e-03, PNorm = 149.1038, GNorm = 0.0488, lr_0 = 2.9005e-04
Loss = 3.6237e-03, PNorm = 149.1118, GNorm = 0.1164, lr_0 = 2.8985e-04
Loss = 3.7740e-03, PNorm = 149.1230, GNorm = 0.3336, lr_0 = 2.8965e-04
Loss = 3.9612e-03, PNorm = 149.1362, GNorm = 0.1110, lr_0 = 2.8945e-04
Loss = 4.3399e-03, PNorm = 149.1455, GNorm = 0.2714, lr_0 = 2.8925e-04
Loss = 3.7958e-03, PNorm = 149.1536, GNorm = 0.4625, lr_0 = 2.8906e-04
Loss = 3.9509e-03, PNorm = 149.1552, GNorm = 0.1888, lr_0 = 2.8886e-04
Loss = 3.3081e-03, PNorm = 149.1613, GNorm = 0.2354, lr_0 = 2.8866e-04
Loss = 3.9006e-03, PNorm = 149.1725, GNorm = 0.3091, lr_0 = 2.8846e-04
Loss = 3.3141e-03, PNorm = 149.1826, GNorm = 0.1194, lr_0 = 2.8826e-04
Loss = 3.2570e-03, PNorm = 149.1913, GNorm = 0.0959, lr_0 = 2.8807e-04
Loss = 3.1666e-03, PNorm = 149.1987, GNorm = 0.2066, lr_0 = 2.8787e-04
Loss = 3.9432e-03, PNorm = 149.2084, GNorm = 0.1550, lr_0 = 2.8767e-04
Loss = 4.4683e-03, PNorm = 149.2175, GNorm = 0.1841, lr_0 = 2.8748e-04
Loss = 3.9085e-03, PNorm = 149.2266, GNorm = 0.1357, lr_0 = 2.8728e-04
Loss = 3.9571e-03, PNorm = 149.2345, GNorm = 0.2681, lr_0 = 2.8708e-04
Loss = 3.7295e-03, PNorm = 149.2452, GNorm = 0.1325, lr_0 = 2.8689e-04
Loss = 6.0687e-03, PNorm = 149.2504, GNorm = 0.1457, lr_0 = 2.8669e-04
Loss = 4.8446e-03, PNorm = 149.2558, GNorm = 0.2537, lr_0 = 2.8649e-04
Loss = 6.1655e-03, PNorm = 149.2611, GNorm = 0.1410, lr_0 = 2.8630e-04
Loss = 3.2425e-03, PNorm = 149.2658, GNorm = 0.0772, lr_0 = 2.8610e-04
Loss = 3.7825e-03, PNorm = 149.2751, GNorm = 0.0725, lr_0 = 2.8590e-04
Loss = 3.2553e-03, PNorm = 149.2857, GNorm = 0.3834, lr_0 = 2.8571e-04
Loss = 5.4467e-03, PNorm = 149.2951, GNorm = 0.1186, lr_0 = 2.8551e-04
Loss = 6.2691e-03, PNorm = 149.3023, GNorm = 0.5476, lr_0 = 2.8532e-04
Loss = 3.5258e-03, PNorm = 149.3104, GNorm = 0.1626, lr_0 = 2.8512e-04
Loss = 4.5125e-03, PNorm = 149.3157, GNorm = 0.1239, lr_0 = 2.8493e-04
Loss = 3.5433e-03, PNorm = 149.3253, GNorm = 0.0757, lr_0 = 2.8473e-04
Loss = 4.4622e-03, PNorm = 149.3321, GNorm = 0.1766, lr_0 = 2.8454e-04
Loss = 5.3876e-03, PNorm = 149.3385, GNorm = 0.1085, lr_0 = 2.8434e-04
Loss = 3.2588e-03, PNorm = 149.3451, GNorm = 0.0721, lr_0 = 2.8415e-04
Loss = 3.2463e-03, PNorm = 149.3492, GNorm = 0.2270, lr_0 = 2.8395e-04
Loss = 3.6828e-03, PNorm = 149.3578, GNorm = 0.3711, lr_0 = 2.8376e-04
Loss = 3.3046e-03, PNorm = 149.3655, GNorm = 0.2182, lr_0 = 2.8356e-04
Loss = 5.4286e-03, PNorm = 149.3734, GNorm = 0.2628, lr_0 = 2.8337e-04
Loss = 5.6890e-03, PNorm = 149.3893, GNorm = 0.1057, lr_0 = 2.8317e-04
Loss = 4.5235e-03, PNorm = 149.4062, GNorm = 0.2293, lr_0 = 2.8298e-04
Loss = 4.0092e-03, PNorm = 149.4176, GNorm = 0.1495, lr_0 = 2.8279e-04
Loss = 2.7953e-03, PNorm = 149.4244, GNorm = 0.1695, lr_0 = 2.8259e-04
Loss = 3.0633e-03, PNorm = 149.4309, GNorm = 0.0491, lr_0 = 2.8240e-04
Loss = 3.5123e-03, PNorm = 149.4403, GNorm = 0.0721, lr_0 = 2.8221e-04
Loss = 5.2909e-03, PNorm = 149.4485, GNorm = 0.2837, lr_0 = 2.8201e-04
Loss = 4.1500e-03, PNorm = 149.4572, GNorm = 0.2106, lr_0 = 2.8182e-04
Loss = 6.4225e-03, PNorm = 149.4619, GNorm = 0.2503, lr_0 = 2.8163e-04
Loss = 3.9296e-03, PNorm = 149.4691, GNorm = 0.1819, lr_0 = 2.8143e-04
Loss = 3.2891e-03, PNorm = 149.4744, GNorm = 0.1982, lr_0 = 2.8124e-04
Loss = 7.8783e-03, PNorm = 149.4814, GNorm = 0.3010, lr_0 = 2.8105e-04
Loss = 4.5235e-03, PNorm = 149.4909, GNorm = 0.2082, lr_0 = 2.8085e-04
Loss = 4.6911e-03, PNorm = 149.4988, GNorm = 0.2180, lr_0 = 2.8066e-04
Loss = 4.1975e-03, PNorm = 149.5085, GNorm = 0.2518, lr_0 = 2.8047e-04
Loss = 3.5080e-03, PNorm = 149.5155, GNorm = 0.3221, lr_0 = 2.8028e-04
Loss = 3.4092e-03, PNorm = 149.5228, GNorm = 0.1207, lr_0 = 2.8009e-04
Loss = 3.2223e-03, PNorm = 149.5293, GNorm = 0.1608, lr_0 = 2.7989e-04
Loss = 3.9600e-03, PNorm = 149.5393, GNorm = 0.2580, lr_0 = 2.7970e-04
Loss = 2.8724e-03, PNorm = 149.5475, GNorm = 0.1892, lr_0 = 2.7951e-04
Loss = 4.0199e-03, PNorm = 149.5577, GNorm = 0.2265, lr_0 = 2.7932e-04
Loss = 2.8903e-03, PNorm = 149.5673, GNorm = 0.1539, lr_0 = 2.7913e-04
Loss = 3.4894e-03, PNorm = 149.5747, GNorm = 0.1570, lr_0 = 2.7894e-04
Loss = 4.7647e-03, PNorm = 149.5808, GNorm = 0.1345, lr_0 = 2.7875e-04
Loss = 3.3382e-03, PNorm = 149.5888, GNorm = 0.0644, lr_0 = 2.7855e-04
Loss = 4.9642e-03, PNorm = 149.5975, GNorm = 0.5695, lr_0 = 2.7836e-04
Loss = 3.5271e-03, PNorm = 149.6074, GNorm = 0.1225, lr_0 = 2.7817e-04
Loss = 4.4094e-03, PNorm = 149.6141, GNorm = 0.4131, lr_0 = 2.7798e-04
Loss = 3.4217e-03, PNorm = 149.6228, GNorm = 0.1309, lr_0 = 2.7779e-04
Loss = 2.8644e-03, PNorm = 149.6300, GNorm = 0.1350, lr_0 = 2.7760e-04
Loss = 3.4280e-03, PNorm = 149.6359, GNorm = 0.2179, lr_0 = 2.7741e-04
Loss = 3.0452e-03, PNorm = 149.6418, GNorm = 0.1703, lr_0 = 2.7722e-04
Loss = 4.6762e-03, PNorm = 149.6500, GNorm = 0.2096, lr_0 = 2.7703e-04
Loss = 4.8214e-03, PNorm = 149.6590, GNorm = 0.1311, lr_0 = 2.7684e-04
Loss = 4.9021e-03, PNorm = 149.6707, GNorm = 0.2243, lr_0 = 2.7665e-04
Loss = 4.9027e-03, PNorm = 149.6824, GNorm = 0.0969, lr_0 = 2.7646e-04
Loss = 4.1003e-03, PNorm = 149.6900, GNorm = 0.0703, lr_0 = 2.7627e-04
Loss = 3.9277e-03, PNorm = 149.6981, GNorm = 0.1095, lr_0 = 2.7608e-04
Loss = 3.8104e-03, PNorm = 149.7050, GNorm = 0.2423, lr_0 = 2.7590e-04
Loss = 4.3881e-03, PNorm = 149.7118, GNorm = 0.2654, lr_0 = 2.7571e-04
Loss = 3.6875e-03, PNorm = 149.7155, GNorm = 0.2075, lr_0 = 2.7552e-04
Loss = 3.9516e-03, PNorm = 149.7224, GNorm = 0.1048, lr_0 = 2.7533e-04
Loss = 3.4372e-03, PNorm = 149.7293, GNorm = 0.0720, lr_0 = 2.7514e-04
Loss = 2.8513e-03, PNorm = 149.7353, GNorm = 0.0942, lr_0 = 2.7495e-04
Loss = 4.5834e-03, PNorm = 149.7426, GNorm = 0.0867, lr_0 = 2.7476e-04
Loss = 4.1710e-03, PNorm = 149.7490, GNorm = 0.0541, lr_0 = 2.7457e-04
Loss = 4.9980e-03, PNorm = 149.7570, GNorm = 0.1742, lr_0 = 2.7439e-04
Loss = 3.7894e-03, PNorm = 149.7666, GNorm = 0.2047, lr_0 = 2.7420e-04
Loss = 3.8499e-03, PNorm = 149.7739, GNorm = 0.2231, lr_0 = 2.7401e-04
Loss = 3.2239e-03, PNorm = 149.7797, GNorm = 0.2161, lr_0 = 2.7382e-04
Loss = 3.7228e-03, PNorm = 149.7870, GNorm = 0.2092, lr_0 = 2.7364e-04
Loss = 3.2258e-03, PNorm = 149.7931, GNorm = 0.2250, lr_0 = 2.7345e-04
Loss = 4.7790e-03, PNorm = 149.8001, GNorm = 0.5323, lr_0 = 2.7326e-04
Loss = 4.0983e-03, PNorm = 149.8099, GNorm = 0.1067, lr_0 = 2.7307e-04
Loss = 3.3625e-03, PNorm = 149.8197, GNorm = 0.3051, lr_0 = 2.7289e-04
Loss = 4.6056e-03, PNorm = 149.8297, GNorm = 0.1555, lr_0 = 2.7270e-04
Loss = 3.6861e-03, PNorm = 149.8357, GNorm = 0.1611, lr_0 = 2.7251e-04
Loss = 4.1348e-03, PNorm = 149.8427, GNorm = 0.1015, lr_0 = 2.7233e-04
Loss = 3.2702e-03, PNorm = 149.8524, GNorm = 0.2839, lr_0 = 2.7214e-04
Loss = 6.0814e-03, PNorm = 149.8591, GNorm = 0.2414, lr_0 = 2.7195e-04
Loss = 3.1130e-03, PNorm = 149.8665, GNorm = 0.0804, lr_0 = 2.7177e-04
Loss = 7.4696e-03, PNorm = 149.8769, GNorm = 0.1628, lr_0 = 2.7158e-04
Loss = 3.9560e-03, PNorm = 149.8875, GNorm = 0.1244, lr_0 = 2.7139e-04
Loss = 3.0786e-03, PNorm = 149.8958, GNorm = 0.1374, lr_0 = 2.7121e-04
Loss = 2.8501e-03, PNorm = 149.9049, GNorm = 0.1795, lr_0 = 2.7102e-04
Loss = 3.5384e-03, PNorm = 149.9094, GNorm = 0.2633, lr_0 = 2.7084e-04
Loss = 4.3310e-03, PNorm = 149.9179, GNorm = 0.1376, lr_0 = 2.7065e-04
Loss = 4.2828e-03, PNorm = 149.9245, GNorm = 0.0718, lr_0 = 2.7047e-04
Loss = 3.7653e-03, PNorm = 149.9329, GNorm = 0.1172, lr_0 = 2.7028e-04
Loss = 2.9460e-03, PNorm = 149.9413, GNorm = 0.1794, lr_0 = 2.7010e-04
Loss = 3.9040e-03, PNorm = 149.9506, GNorm = 0.2433, lr_0 = 2.6991e-04
Loss = 3.5654e-03, PNorm = 149.9623, GNorm = 0.0723, lr_0 = 2.6973e-04
Loss = 3.2179e-03, PNorm = 149.9756, GNorm = 0.0592, lr_0 = 2.6954e-04
Loss = 3.4227e-03, PNorm = 149.9870, GNorm = 0.5209, lr_0 = 2.6936e-04
Loss = 3.8322e-03, PNorm = 149.9935, GNorm = 0.1852, lr_0 = 2.6917e-04
Loss = 3.0152e-03, PNorm = 150.0009, GNorm = 0.1288, lr_0 = 2.6899e-04
Loss = 4.5602e-03, PNorm = 150.0044, GNorm = 0.1193, lr_0 = 2.6880e-04
Loss = 2.8498e-03, PNorm = 150.0137, GNorm = 0.1125, lr_0 = 2.6862e-04
Loss = 4.5751e-03, PNorm = 150.0236, GNorm = 0.3198, lr_0 = 2.6844e-04
Loss = 3.3525e-03, PNorm = 150.0293, GNorm = 0.4059, lr_0 = 2.6825e-04
Validation mae = 0.477143
Epoch 18
Loss = 3.7586e-03, PNorm = 150.0339, GNorm = 0.2701, lr_0 = 2.6807e-04
Loss = 5.1515e-03, PNorm = 150.0385, GNorm = 0.0814, lr_0 = 2.6788e-04
Loss = 2.8090e-03, PNorm = 150.0457, GNorm = 0.0650, lr_0 = 2.6770e-04
Loss = 3.6237e-03, PNorm = 150.0522, GNorm = 0.2668, lr_0 = 2.6752e-04
Loss = 3.6642e-03, PNorm = 150.0565, GNorm = 0.4888, lr_0 = 2.6733e-04
Loss = 3.2966e-03, PNorm = 150.0631, GNorm = 0.1153, lr_0 = 2.6715e-04
Loss = 3.8092e-03, PNorm = 150.0681, GNorm = 0.2728, lr_0 = 2.6697e-04
Loss = 3.3413e-03, PNorm = 150.0756, GNorm = 0.0920, lr_0 = 2.6678e-04
Loss = 3.0973e-03, PNorm = 150.0793, GNorm = 0.2277, lr_0 = 2.6660e-04
Loss = 3.5164e-03, PNorm = 150.0847, GNorm = 0.1028, lr_0 = 2.6642e-04
Loss = 2.7732e-03, PNorm = 150.0909, GNorm = 0.0549, lr_0 = 2.6624e-04
Loss = 3.7666e-03, PNorm = 150.0975, GNorm = 0.1547, lr_0 = 2.6605e-04
Loss = 3.0620e-03, PNorm = 150.1026, GNorm = 0.1063, lr_0 = 2.6587e-04
Loss = 4.1521e-03, PNorm = 150.1109, GNorm = 0.2254, lr_0 = 2.6569e-04
Loss = 2.8386e-03, PNorm = 150.1165, GNorm = 0.3612, lr_0 = 2.6551e-04
Loss = 4.3893e-03, PNorm = 150.1218, GNorm = 0.2102, lr_0 = 2.6533e-04
Loss = 3.8881e-03, PNorm = 150.1246, GNorm = 0.2213, lr_0 = 2.6514e-04
Loss = 2.9993e-03, PNorm = 150.1316, GNorm = 0.4457, lr_0 = 2.6496e-04
Loss = 3.1043e-03, PNorm = 150.1400, GNorm = 0.1738, lr_0 = 2.6478e-04
Loss = 3.2789e-03, PNorm = 150.1469, GNorm = 0.2795, lr_0 = 2.6460e-04
Loss = 3.3649e-03, PNorm = 150.1553, GNorm = 0.1005, lr_0 = 2.6442e-04
Loss = 2.6974e-03, PNorm = 150.1604, GNorm = 0.1044, lr_0 = 2.6424e-04
Loss = 3.9216e-03, PNorm = 150.1640, GNorm = 0.2238, lr_0 = 2.6406e-04
Loss = 4.6266e-03, PNorm = 150.1710, GNorm = 0.1958, lr_0 = 2.6388e-04
Loss = 2.8035e-03, PNorm = 150.1763, GNorm = 0.0917, lr_0 = 2.6369e-04
Loss = 3.0564e-03, PNorm = 150.1818, GNorm = 0.2008, lr_0 = 2.6351e-04
Loss = 2.7445e-03, PNorm = 150.1874, GNorm = 0.2660, lr_0 = 2.6333e-04
Loss = 3.5450e-03, PNorm = 150.1955, GNorm = 0.2917, lr_0 = 2.6315e-04
Loss = 2.7136e-03, PNorm = 150.2000, GNorm = 0.1842, lr_0 = 2.6297e-04
Loss = 2.9589e-03, PNorm = 150.2055, GNorm = 0.1866, lr_0 = 2.6279e-04
Loss = 2.7801e-03, PNorm = 150.2102, GNorm = 0.1221, lr_0 = 2.6261e-04
Loss = 4.1712e-03, PNorm = 150.2163, GNorm = 0.1986, lr_0 = 2.6243e-04
Loss = 3.6084e-03, PNorm = 150.2254, GNorm = 0.1370, lr_0 = 2.6225e-04
Loss = 3.9669e-03, PNorm = 150.2317, GNorm = 0.1149, lr_0 = 2.6207e-04
Loss = 3.5324e-03, PNorm = 150.2350, GNorm = 0.4946, lr_0 = 2.6189e-04
Loss = 2.8411e-03, PNorm = 150.2420, GNorm = 0.1335, lr_0 = 2.6171e-04
Loss = 4.4963e-03, PNorm = 150.2482, GNorm = 0.1519, lr_0 = 2.6153e-04
Loss = 3.3755e-03, PNorm = 150.2567, GNorm = 0.1958, lr_0 = 2.6136e-04
Loss = 5.0761e-03, PNorm = 150.2656, GNorm = 0.1996, lr_0 = 2.6118e-04
Loss = 3.2002e-03, PNorm = 150.2724, GNorm = 0.2518, lr_0 = 2.6100e-04
Loss = 4.5272e-03, PNorm = 150.2848, GNorm = 0.2721, lr_0 = 2.6082e-04
Loss = 3.6004e-03, PNorm = 150.2919, GNorm = 0.1729, lr_0 = 2.6064e-04
Loss = 4.3811e-03, PNorm = 150.3018, GNorm = 0.1526, lr_0 = 2.6046e-04
Loss = 2.9826e-03, PNorm = 150.3065, GNorm = 0.1714, lr_0 = 2.6028e-04
Loss = 3.0914e-03, PNorm = 150.3114, GNorm = 0.3761, lr_0 = 2.6011e-04
Loss = 2.6910e-03, PNorm = 150.3163, GNorm = 0.2839, lr_0 = 2.5993e-04
Loss = 2.7831e-03, PNorm = 150.3227, GNorm = 0.2172, lr_0 = 2.5975e-04
Loss = 4.0871e-03, PNorm = 150.3282, GNorm = 0.1417, lr_0 = 2.5957e-04
Loss = 3.9270e-03, PNorm = 150.3344, GNorm = 0.1020, lr_0 = 2.5939e-04
Loss = 2.5722e-03, PNorm = 150.3401, GNorm = 0.1397, lr_0 = 2.5922e-04
Loss = 3.7220e-03, PNorm = 150.3441, GNorm = 0.1154, lr_0 = 2.5904e-04
Loss = 3.1649e-03, PNorm = 150.3513, GNorm = 0.1073, lr_0 = 2.5886e-04
Loss = 2.6793e-03, PNorm = 150.3570, GNorm = 0.2580, lr_0 = 2.5868e-04
Loss = 3.1790e-03, PNorm = 150.3665, GNorm = 0.1601, lr_0 = 2.5851e-04
Loss = 3.5914e-03, PNorm = 150.3706, GNorm = 0.3614, lr_0 = 2.5833e-04
Loss = 4.3904e-03, PNorm = 150.3779, GNorm = 0.0535, lr_0 = 2.5815e-04
Loss = 2.8273e-03, PNorm = 150.3822, GNorm = 0.1422, lr_0 = 2.5797e-04
Loss = 3.1809e-03, PNorm = 150.3905, GNorm = 0.2163, lr_0 = 2.5780e-04
Loss = 2.9361e-03, PNorm = 150.3977, GNorm = 0.1162, lr_0 = 2.5762e-04
Loss = 2.6727e-03, PNorm = 150.4028, GNorm = 0.1735, lr_0 = 2.5745e-04
Loss = 2.8441e-03, PNorm = 150.4080, GNorm = 0.5493, lr_0 = 2.5727e-04
Loss = 4.3132e-03, PNorm = 150.4126, GNorm = 0.1940, lr_0 = 2.5709e-04
Loss = 3.1862e-03, PNorm = 150.4231, GNorm = 0.1985, lr_0 = 2.5692e-04
Loss = 2.5546e-03, PNorm = 150.4311, GNorm = 0.1238, lr_0 = 2.5674e-04
Loss = 2.7437e-03, PNorm = 150.4397, GNorm = 0.2359, lr_0 = 2.5656e-04
Loss = 5.7942e-03, PNorm = 150.4507, GNorm = 0.3870, lr_0 = 2.5639e-04
Loss = 3.7424e-03, PNorm = 150.4597, GNorm = 0.3195, lr_0 = 2.5621e-04
Loss = 3.2435e-03, PNorm = 150.4686, GNorm = 0.1994, lr_0 = 2.5604e-04
Loss = 4.1096e-03, PNorm = 150.4735, GNorm = 0.1325, lr_0 = 2.5586e-04
Loss = 3.3675e-03, PNorm = 150.4809, GNorm = 0.1456, lr_0 = 2.5569e-04
Loss = 4.4945e-03, PNorm = 150.4835, GNorm = 0.1103, lr_0 = 2.5551e-04
Loss = 3.1612e-03, PNorm = 150.4855, GNorm = 0.1377, lr_0 = 2.5534e-04
Loss = 3.1150e-03, PNorm = 150.4920, GNorm = 0.0662, lr_0 = 2.5516e-04
Loss = 2.8433e-03, PNorm = 150.5013, GNorm = 0.1701, lr_0 = 2.5499e-04
Loss = 3.6887e-03, PNorm = 150.5073, GNorm = 0.2648, lr_0 = 2.5481e-04
Loss = 2.8416e-03, PNorm = 150.5138, GNorm = 0.0725, lr_0 = 2.5464e-04
Loss = 2.6431e-03, PNorm = 150.5210, GNorm = 0.0671, lr_0 = 2.5446e-04
Loss = 4.2365e-03, PNorm = 150.5296, GNorm = 0.0932, lr_0 = 2.5429e-04
Loss = 3.8293e-03, PNorm = 150.5387, GNorm = 0.1315, lr_0 = 2.5411e-04
Loss = 3.1653e-03, PNorm = 150.5475, GNorm = 0.1308, lr_0 = 2.5394e-04
Loss = 2.3160e-03, PNorm = 150.5534, GNorm = 0.1395, lr_0 = 2.5377e-04
Loss = 3.1137e-03, PNorm = 150.5579, GNorm = 0.1479, lr_0 = 2.5359e-04
Loss = 3.0329e-03, PNorm = 150.5641, GNorm = 0.1813, lr_0 = 2.5342e-04
Loss = 3.9470e-03, PNorm = 150.5704, GNorm = 0.1171, lr_0 = 2.5325e-04
Loss = 4.0310e-03, PNorm = 150.5766, GNorm = 0.1993, lr_0 = 2.5307e-04
Loss = 2.6208e-03, PNorm = 150.5836, GNorm = 0.1914, lr_0 = 2.5290e-04
Loss = 2.8538e-03, PNorm = 150.5910, GNorm = 0.0666, lr_0 = 2.5273e-04
Loss = 2.2568e-03, PNorm = 150.5971, GNorm = 0.1539, lr_0 = 2.5255e-04
Loss = 3.3320e-03, PNorm = 150.6042, GNorm = 0.1409, lr_0 = 2.5238e-04
Loss = 2.5621e-03, PNorm = 150.6096, GNorm = 0.0835, lr_0 = 2.5221e-04
Loss = 2.4982e-03, PNorm = 150.6174, GNorm = 0.1673, lr_0 = 2.5203e-04
Loss = 3.8822e-03, PNorm = 150.6244, GNorm = 0.5124, lr_0 = 2.5186e-04
Loss = 3.3287e-03, PNorm = 150.6299, GNorm = 0.0982, lr_0 = 2.5169e-04
Loss = 2.9628e-03, PNorm = 150.6349, GNorm = 0.3049, lr_0 = 2.5152e-04
Loss = 3.2182e-03, PNorm = 150.6382, GNorm = 0.1281, lr_0 = 2.5134e-04
Loss = 3.0000e-03, PNorm = 150.6435, GNorm = 0.1591, lr_0 = 2.5117e-04
Loss = 4.2478e-03, PNorm = 150.6488, GNorm = 0.1456, lr_0 = 2.5100e-04
Loss = 2.7655e-03, PNorm = 150.6541, GNorm = 0.0583, lr_0 = 2.5083e-04
Loss = 2.7901e-03, PNorm = 150.6619, GNorm = 0.1169, lr_0 = 2.5066e-04
Loss = 3.2155e-03, PNorm = 150.6689, GNorm = 0.0989, lr_0 = 2.5048e-04
Loss = 4.3006e-03, PNorm = 150.6741, GNorm = 0.1766, lr_0 = 2.5031e-04
Loss = 3.7132e-03, PNorm = 150.6784, GNorm = 0.3118, lr_0 = 2.5014e-04
Loss = 4.7549e-03, PNorm = 150.6872, GNorm = 0.1260, lr_0 = 2.4997e-04
Loss = 3.4870e-03, PNorm = 150.6966, GNorm = 0.2319, lr_0 = 2.4980e-04
Loss = 4.0398e-03, PNorm = 150.7059, GNorm = 0.0993, lr_0 = 2.4963e-04
Loss = 5.9012e-03, PNorm = 150.7154, GNorm = 0.1523, lr_0 = 2.4946e-04
Loss = 3.5564e-03, PNorm = 150.7217, GNorm = 0.1604, lr_0 = 2.4929e-04
Loss = 2.6694e-03, PNorm = 150.7285, GNorm = 0.2905, lr_0 = 2.4911e-04
Loss = 4.2903e-03, PNorm = 150.7345, GNorm = 0.1688, lr_0 = 2.4894e-04
Loss = 3.1603e-03, PNorm = 150.7400, GNorm = 0.0882, lr_0 = 2.4877e-04
Loss = 5.0392e-03, PNorm = 150.7462, GNorm = 0.7488, lr_0 = 2.4860e-04
Loss = 3.2813e-03, PNorm = 150.7533, GNorm = 0.1027, lr_0 = 2.4843e-04
Loss = 2.9503e-03, PNorm = 150.7582, GNorm = 0.0883, lr_0 = 2.4826e-04
Loss = 4.1935e-03, PNorm = 150.7666, GNorm = 0.1772, lr_0 = 2.4809e-04
Loss = 2.5803e-03, PNorm = 150.7750, GNorm = 0.1740, lr_0 = 2.4792e-04
Loss = 4.1574e-03, PNorm = 150.7842, GNorm = 0.2457, lr_0 = 2.4775e-04
Loss = 3.3571e-03, PNorm = 150.7931, GNorm = 0.0745, lr_0 = 2.4758e-04
Loss = 4.6555e-03, PNorm = 150.8043, GNorm = 0.0845, lr_0 = 2.4741e-04
Loss = 4.1755e-03, PNorm = 150.8154, GNorm = 0.1098, lr_0 = 2.4724e-04
Loss = 2.3837e-03, PNorm = 150.8219, GNorm = 0.2001, lr_0 = 2.4707e-04
Validation mae = 0.477144
Epoch 19
Loss = 2.5197e-03, PNorm = 150.8234, GNorm = 0.1700, lr_0 = 2.4690e-04
Loss = 2.5397e-03, PNorm = 150.8228, GNorm = 0.0456, lr_0 = 2.4674e-04
Loss = 2.5356e-03, PNorm = 150.8227, GNorm = 0.0925, lr_0 = 2.4657e-04
Loss = 3.4298e-03, PNorm = 150.8258, GNorm = 0.0597, lr_0 = 2.4640e-04
Loss = 2.7547e-03, PNorm = 150.8308, GNorm = 0.0804, lr_0 = 2.4623e-04
Loss = 2.5335e-03, PNorm = 150.8320, GNorm = 0.3020, lr_0 = 2.4606e-04
Loss = 3.6140e-03, PNorm = 150.8347, GNorm = 0.1631, lr_0 = 2.4589e-04
Loss = 3.3219e-03, PNorm = 150.8376, GNorm = 0.1784, lr_0 = 2.4572e-04
Loss = 2.8983e-03, PNorm = 150.8420, GNorm = 0.0867, lr_0 = 2.4556e-04
Loss = 4.5138e-03, PNorm = 150.8482, GNorm = 0.1683, lr_0 = 2.4539e-04
Loss = 2.5981e-03, PNorm = 150.8529, GNorm = 0.2580, lr_0 = 2.4522e-04
Loss = 2.8242e-03, PNorm = 150.8590, GNorm = 0.1492, lr_0 = 2.4505e-04
Loss = 2.4057e-03, PNorm = 150.8634, GNorm = 0.1038, lr_0 = 2.4488e-04
Loss = 2.7102e-03, PNorm = 150.8666, GNorm = 0.0829, lr_0 = 2.4472e-04
Loss = 2.5251e-03, PNorm = 150.8677, GNorm = 0.1194, lr_0 = 2.4455e-04
Loss = 2.1823e-03, PNorm = 150.8724, GNorm = 0.0716, lr_0 = 2.4438e-04
Loss = 2.0712e-03, PNorm = 150.8766, GNorm = 0.1580, lr_0 = 2.4421e-04
Loss = 2.2575e-03, PNorm = 150.8806, GNorm = 0.0754, lr_0 = 2.4405e-04
Loss = 2.8560e-03, PNorm = 150.8842, GNorm = 0.1961, lr_0 = 2.4388e-04
Loss = 2.5903e-03, PNorm = 150.8904, GNorm = 0.1258, lr_0 = 2.4371e-04
Loss = 2.0516e-03, PNorm = 150.8969, GNorm = 0.1335, lr_0 = 2.4354e-04
Loss = 4.0127e-03, PNorm = 150.9022, GNorm = 0.1257, lr_0 = 2.4338e-04
Loss = 2.1969e-03, PNorm = 150.9093, GNorm = 0.0555, lr_0 = 2.4321e-04
Loss = 2.6004e-03, PNorm = 150.9169, GNorm = 0.2133, lr_0 = 2.4304e-04
Loss = 2.8811e-03, PNorm = 150.9201, GNorm = 0.1442, lr_0 = 2.4288e-04
Loss = 2.2199e-03, PNorm = 150.9254, GNorm = 0.0807, lr_0 = 2.4271e-04
Loss = 2.4004e-03, PNorm = 150.9311, GNorm = 0.0593, lr_0 = 2.4254e-04
Loss = 2.6134e-03, PNorm = 150.9375, GNorm = 0.1633, lr_0 = 2.4238e-04
Loss = 2.2459e-03, PNorm = 150.9406, GNorm = 0.0485, lr_0 = 2.4221e-04
Loss = 3.6502e-03, PNorm = 150.9455, GNorm = 0.1053, lr_0 = 2.4205e-04
Loss = 2.2694e-03, PNorm = 150.9516, GNorm = 0.0842, lr_0 = 2.4188e-04
Loss = 2.2715e-03, PNorm = 150.9593, GNorm = 0.1533, lr_0 = 2.4171e-04
Loss = 3.7493e-03, PNorm = 150.9667, GNorm = 0.1017, lr_0 = 2.4155e-04
Loss = 2.2992e-03, PNorm = 150.9724, GNorm = 0.1588, lr_0 = 2.4138e-04
Loss = 3.1336e-03, PNorm = 150.9793, GNorm = 0.1528, lr_0 = 2.4122e-04
Loss = 3.2357e-03, PNorm = 150.9899, GNorm = 0.3677, lr_0 = 2.4105e-04
Loss = 2.3006e-03, PNorm = 150.9965, GNorm = 0.0655, lr_0 = 2.4089e-04
Loss = 2.1986e-03, PNorm = 151.0016, GNorm = 0.0921, lr_0 = 2.4072e-04
Loss = 2.5558e-03, PNorm = 151.0060, GNorm = 0.1242, lr_0 = 2.4056e-04
Loss = 3.8381e-03, PNorm = 151.0127, GNorm = 0.0930, lr_0 = 2.4039e-04
Loss = 2.8024e-03, PNorm = 151.0167, GNorm = 0.1216, lr_0 = 2.4023e-04
Loss = 4.5629e-03, PNorm = 151.0229, GNorm = 0.1412, lr_0 = 2.4006e-04
Loss = 2.3535e-03, PNorm = 151.0284, GNorm = 0.1853, lr_0 = 2.3990e-04
Loss = 2.0277e-03, PNorm = 151.0318, GNorm = 0.2541, lr_0 = 2.3974e-04
Loss = 3.1028e-03, PNorm = 151.0386, GNorm = 0.2960, lr_0 = 2.3957e-04
Loss = 2.7746e-03, PNorm = 151.0449, GNorm = 0.0657, lr_0 = 2.3941e-04
Loss = 2.5418e-03, PNorm = 151.0502, GNorm = 0.1010, lr_0 = 2.3924e-04
Loss = 2.7166e-03, PNorm = 151.0545, GNorm = 0.0978, lr_0 = 2.3908e-04
Loss = 2.9487e-03, PNorm = 151.0586, GNorm = 0.1255, lr_0 = 2.3892e-04
Loss = 2.1119e-03, PNorm = 151.0635, GNorm = 0.1494, lr_0 = 2.3875e-04
Loss = 3.0745e-03, PNorm = 151.0688, GNorm = 0.0619, lr_0 = 2.3859e-04
Loss = 2.6414e-03, PNorm = 151.0749, GNorm = 0.1810, lr_0 = 2.3842e-04
Loss = 2.6862e-03, PNorm = 151.0789, GNorm = 0.1732, lr_0 = 2.3826e-04
Loss = 2.6354e-03, PNorm = 151.0829, GNorm = 0.0991, lr_0 = 2.3810e-04
Loss = 3.4614e-03, PNorm = 151.0871, GNorm = 0.2786, lr_0 = 2.3794e-04
Loss = 2.8691e-03, PNorm = 151.0886, GNorm = 0.2007, lr_0 = 2.3777e-04
Loss = 3.8803e-03, PNorm = 151.0916, GNorm = 0.1202, lr_0 = 2.3761e-04
Loss = 2.9355e-03, PNorm = 151.1009, GNorm = 0.2469, lr_0 = 2.3745e-04
Loss = 4.5628e-03, PNorm = 151.1064, GNorm = 0.0735, lr_0 = 2.3728e-04
Loss = 2.9749e-03, PNorm = 151.1119, GNorm = 0.1825, lr_0 = 2.3712e-04
Loss = 2.8480e-03, PNorm = 151.1169, GNorm = 0.1692, lr_0 = 2.3696e-04
Loss = 3.1442e-03, PNorm = 151.1213, GNorm = 0.2659, lr_0 = 2.3680e-04
Loss = 3.1697e-03, PNorm = 151.1271, GNorm = 0.2028, lr_0 = 2.3663e-04
Loss = 4.5340e-03, PNorm = 151.1334, GNorm = 0.2479, lr_0 = 2.3647e-04
Loss = 2.6708e-03, PNorm = 151.1406, GNorm = 0.0852, lr_0 = 2.3631e-04
Loss = 5.7689e-03, PNorm = 151.1477, GNorm = 0.4240, lr_0 = 2.3615e-04
Loss = 2.9033e-03, PNorm = 151.1536, GNorm = 0.0830, lr_0 = 2.3599e-04
Loss = 2.3781e-03, PNorm = 151.1574, GNorm = 0.1949, lr_0 = 2.3582e-04
Loss = 2.3278e-03, PNorm = 151.1630, GNorm = 0.1118, lr_0 = 2.3566e-04
Loss = 3.4695e-03, PNorm = 151.1673, GNorm = 0.3417, lr_0 = 2.3550e-04
Loss = 2.5948e-03, PNorm = 151.1745, GNorm = 0.0721, lr_0 = 2.3534e-04
Loss = 3.0918e-03, PNorm = 151.1806, GNorm = 0.2327, lr_0 = 2.3518e-04
Loss = 3.3171e-03, PNorm = 151.1848, GNorm = 0.0909, lr_0 = 2.3502e-04
Loss = 2.2738e-03, PNorm = 151.1888, GNorm = 0.1402, lr_0 = 2.3486e-04
Loss = 2.1941e-03, PNorm = 151.1932, GNorm = 0.1357, lr_0 = 2.3470e-04
Loss = 2.2668e-03, PNorm = 151.2003, GNorm = 0.2145, lr_0 = 2.3454e-04
Loss = 2.9343e-03, PNorm = 151.2069, GNorm = 0.1115, lr_0 = 2.3437e-04
Loss = 2.6102e-03, PNorm = 151.2134, GNorm = 0.0798, lr_0 = 2.3421e-04
Loss = 2.4103e-03, PNorm = 151.2188, GNorm = 0.0827, lr_0 = 2.3405e-04
Loss = 2.3053e-03, PNorm = 151.2237, GNorm = 0.0723, lr_0 = 2.3389e-04
Loss = 3.2437e-03, PNorm = 151.2255, GNorm = 0.2284, lr_0 = 2.3373e-04
Loss = 2.6993e-03, PNorm = 151.2302, GNorm = 0.0868, lr_0 = 2.3357e-04
Loss = 2.3607e-03, PNorm = 151.2349, GNorm = 0.2625, lr_0 = 2.3341e-04
Loss = 2.6678e-03, PNorm = 151.2438, GNorm = 0.0602, lr_0 = 2.3325e-04
Loss = 2.3141e-03, PNorm = 151.2524, GNorm = 0.1676, lr_0 = 2.3309e-04
Loss = 2.9670e-03, PNorm = 151.2588, GNorm = 0.2082, lr_0 = 2.3293e-04
Loss = 3.3840e-03, PNorm = 151.2697, GNorm = 0.1704, lr_0 = 2.3277e-04
Loss = 3.0953e-03, PNorm = 151.2761, GNorm = 0.1033, lr_0 = 2.3261e-04
Loss = 3.2980e-03, PNorm = 151.2806, GNorm = 0.2001, lr_0 = 2.3246e-04
Loss = 3.2087e-03, PNorm = 151.2810, GNorm = 0.1526, lr_0 = 2.3230e-04
Loss = 2.4760e-03, PNorm = 151.2857, GNorm = 0.1672, lr_0 = 2.3214e-04
Loss = 2.3795e-03, PNorm = 151.2919, GNorm = 0.1667, lr_0 = 2.3198e-04
Loss = 5.2123e-03, PNorm = 151.2990, GNorm = 0.2889, lr_0 = 2.3182e-04
Loss = 2.7698e-03, PNorm = 151.3039, GNorm = 0.1467, lr_0 = 2.3166e-04
Loss = 3.1641e-03, PNorm = 151.3115, GNorm = 0.0536, lr_0 = 2.3150e-04
Loss = 5.4833e-03, PNorm = 151.3173, GNorm = 0.0962, lr_0 = 2.3134e-04
Loss = 3.1995e-03, PNorm = 151.3254, GNorm = 0.1589, lr_0 = 2.3118e-04
Loss = 2.6273e-03, PNorm = 151.3324, GNorm = 0.0575, lr_0 = 2.3103e-04
Loss = 3.4983e-03, PNorm = 151.3393, GNorm = 0.1119, lr_0 = 2.3087e-04
Loss = 5.6069e-03, PNorm = 151.3452, GNorm = 0.1218, lr_0 = 2.3071e-04
Loss = 2.3496e-03, PNorm = 151.3524, GNorm = 0.1213, lr_0 = 2.3055e-04
Loss = 3.4579e-03, PNorm = 151.3604, GNorm = 0.0670, lr_0 = 2.3039e-04
Loss = 3.9135e-03, PNorm = 151.3650, GNorm = 0.1381, lr_0 = 2.3024e-04
Loss = 3.5305e-03, PNorm = 151.3711, GNorm = 0.1048, lr_0 = 2.3008e-04
Loss = 4.0930e-03, PNorm = 151.3782, GNorm = 0.4234, lr_0 = 2.2992e-04
Loss = 3.7902e-03, PNorm = 151.3859, GNorm = 0.1058, lr_0 = 2.2976e-04
Loss = 2.2728e-03, PNorm = 151.3934, GNorm = 0.1867, lr_0 = 2.2961e-04
Loss = 3.6906e-03, PNorm = 151.3997, GNorm = 0.1470, lr_0 = 2.2945e-04
Loss = 3.3536e-03, PNorm = 151.4038, GNorm = 0.2242, lr_0 = 2.2929e-04
Loss = 3.6479e-03, PNorm = 151.4098, GNorm = 0.2526, lr_0 = 2.2913e-04
Loss = 3.0560e-03, PNorm = 151.4131, GNorm = 0.2303, lr_0 = 2.2898e-04
Loss = 2.6540e-03, PNorm = 151.4188, GNorm = 0.1562, lr_0 = 2.2882e-04
Loss = 3.3225e-03, PNorm = 151.4245, GNorm = 0.1760, lr_0 = 2.2866e-04
Loss = 2.9367e-03, PNorm = 151.4311, GNorm = 0.1026, lr_0 = 2.2851e-04
Loss = 3.4256e-03, PNorm = 151.4370, GNorm = 0.2207, lr_0 = 2.2835e-04
Loss = 5.2914e-03, PNorm = 151.4445, GNorm = 0.2050, lr_0 = 2.2819e-04
Loss = 3.4988e-03, PNorm = 151.4512, GNorm = 0.3641, lr_0 = 2.2804e-04
Loss = 2.5664e-03, PNorm = 151.4582, GNorm = 0.2653, lr_0 = 2.2788e-04
Loss = 2.4320e-03, PNorm = 151.4652, GNorm = 0.0905, lr_0 = 2.2773e-04
Loss = 2.7008e-03, PNorm = 151.4694, GNorm = 0.0802, lr_0 = 2.2757e-04
Validation mae = 0.476939
Epoch 20
Loss = 2.1919e-03, PNorm = 151.4746, GNorm = 0.0455, lr_0 = 2.2741e-04
Loss = 3.9185e-03, PNorm = 151.4814, GNorm = 0.1257, lr_0 = 2.2726e-04
Loss = 2.0365e-03, PNorm = 151.4883, GNorm = 0.0682, lr_0 = 2.2710e-04
Loss = 2.2994e-03, PNorm = 151.4923, GNorm = 0.1209, lr_0 = 2.2695e-04
Loss = 1.9364e-03, PNorm = 151.4961, GNorm = 0.0508, lr_0 = 2.2679e-04
Loss = 2.2140e-03, PNorm = 151.5008, GNorm = 0.1129, lr_0 = 2.2664e-04
Loss = 3.4238e-03, PNorm = 151.5055, GNorm = 0.0408, lr_0 = 2.2648e-04
Loss = 3.2021e-03, PNorm = 151.5105, GNorm = 0.1053, lr_0 = 2.2632e-04
Loss = 2.1786e-03, PNorm = 151.5150, GNorm = 0.1280, lr_0 = 2.2617e-04
Loss = 2.4176e-03, PNorm = 151.5193, GNorm = 0.0700, lr_0 = 2.2601e-04
Loss = 3.4519e-03, PNorm = 151.5224, GNorm = 0.2305, lr_0 = 2.2586e-04
Loss = 2.7366e-03, PNorm = 151.5255, GNorm = 0.1848, lr_0 = 2.2571e-04
Loss = 2.3142e-03, PNorm = 151.5284, GNorm = 0.1797, lr_0 = 2.2555e-04
Loss = 4.8207e-03, PNorm = 151.5334, GNorm = 0.5129, lr_0 = 2.2540e-04
Loss = 2.4679e-03, PNorm = 151.5394, GNorm = 0.1023, lr_0 = 2.2524e-04
Loss = 4.1578e-03, PNorm = 151.5437, GNorm = 0.1794, lr_0 = 2.2509e-04
Loss = 2.2082e-03, PNorm = 151.5485, GNorm = 0.1065, lr_0 = 2.2493e-04
Loss = 2.0981e-03, PNorm = 151.5525, GNorm = 0.0720, lr_0 = 2.2478e-04
Loss = 2.6066e-03, PNorm = 151.5537, GNorm = 0.2720, lr_0 = 2.2463e-04
Loss = 3.9112e-03, PNorm = 151.5550, GNorm = 0.0931, lr_0 = 2.2447e-04
Loss = 2.4362e-03, PNorm = 151.5582, GNorm = 0.0828, lr_0 = 2.2432e-04
Loss = 4.3224e-03, PNorm = 151.5634, GNorm = 0.2232, lr_0 = 2.2416e-04
Loss = 3.6307e-03, PNorm = 151.5720, GNorm = 0.0880, lr_0 = 2.2401e-04
Loss = 1.5445e-03, PNorm = 151.5779, GNorm = 0.0727, lr_0 = 2.2386e-04
Loss = 3.5154e-03, PNorm = 151.5809, GNorm = 0.0534, lr_0 = 2.2370e-04
Loss = 2.3545e-03, PNorm = 151.5831, GNorm = 0.1694, lr_0 = 2.2355e-04
Loss = 2.6059e-03, PNorm = 151.5854, GNorm = 0.1067, lr_0 = 2.2340e-04
Loss = 4.0112e-03, PNorm = 151.5874, GNorm = 0.3028, lr_0 = 2.2324e-04
Loss = 2.0741e-03, PNorm = 151.5918, GNorm = 0.1029, lr_0 = 2.2309e-04
Loss = 2.6105e-03, PNorm = 151.5964, GNorm = 0.1010, lr_0 = 2.2294e-04
Loss = 3.5183e-03, PNorm = 151.6002, GNorm = 0.1491, lr_0 = 2.2279e-04
Loss = 3.0417e-03, PNorm = 151.6055, GNorm = 0.1248, lr_0 = 2.2263e-04
Loss = 2.2361e-03, PNorm = 151.6109, GNorm = 0.2903, lr_0 = 2.2248e-04
Loss = 2.9981e-03, PNorm = 151.6137, GNorm = 0.1890, lr_0 = 2.2233e-04
Loss = 2.4509e-03, PNorm = 151.6204, GNorm = 0.2153, lr_0 = 2.2218e-04
Loss = 2.8679e-03, PNorm = 151.6261, GNorm = 0.1617, lr_0 = 2.2202e-04
Loss = 2.1218e-03, PNorm = 151.6301, GNorm = 0.2023, lr_0 = 2.2187e-04
Loss = 2.3866e-03, PNorm = 151.6364, GNorm = 0.2379, lr_0 = 2.2172e-04
Loss = 2.4058e-03, PNorm = 151.6410, GNorm = 0.1952, lr_0 = 2.2157e-04
Loss = 2.2116e-03, PNorm = 151.6484, GNorm = 0.1410, lr_0 = 2.2142e-04
Loss = 1.6739e-03, PNorm = 151.6531, GNorm = 0.1059, lr_0 = 2.2126e-04
Loss = 1.8142e-03, PNorm = 151.6582, GNorm = 0.1181, lr_0 = 2.2111e-04
Loss = 1.8154e-03, PNorm = 151.6630, GNorm = 0.1242, lr_0 = 2.2096e-04
Loss = 2.7563e-03, PNorm = 151.6670, GNorm = 0.1339, lr_0 = 2.2081e-04
Loss = 2.5041e-03, PNorm = 151.6694, GNorm = 0.1126, lr_0 = 2.2066e-04
Loss = 2.0864e-03, PNorm = 151.6757, GNorm = 0.2498, lr_0 = 2.2051e-04
Loss = 2.8953e-03, PNorm = 151.6778, GNorm = 0.2067, lr_0 = 2.2036e-04
Loss = 2.1340e-03, PNorm = 151.6828, GNorm = 0.0981, lr_0 = 2.2021e-04
Loss = 1.8232e-03, PNorm = 151.6893, GNorm = 0.0914, lr_0 = 2.2005e-04
Loss = 2.7136e-03, PNorm = 151.6938, GNorm = 0.0662, lr_0 = 2.1990e-04
Loss = 2.1713e-03, PNorm = 151.6960, GNorm = 0.2076, lr_0 = 2.1975e-04
Loss = 2.7583e-03, PNorm = 151.6997, GNorm = 0.0870, lr_0 = 2.1960e-04
Loss = 1.7637e-03, PNorm = 151.7087, GNorm = 0.1611, lr_0 = 2.1945e-04
Loss = 1.9912e-03, PNorm = 151.7174, GNorm = 0.2063, lr_0 = 2.1930e-04
Loss = 2.2270e-03, PNorm = 151.7239, GNorm = 0.1868, lr_0 = 2.1915e-04
Loss = 2.9262e-03, PNorm = 151.7320, GNorm = 0.1676, lr_0 = 2.1900e-04
Loss = 3.4556e-03, PNorm = 151.7382, GNorm = 0.0747, lr_0 = 2.1885e-04
Loss = 1.9530e-03, PNorm = 151.7432, GNorm = 0.2227, lr_0 = 2.1870e-04
Loss = 2.0232e-03, PNorm = 151.7499, GNorm = 0.0641, lr_0 = 2.1855e-04
Loss = 3.2752e-03, PNorm = 151.7585, GNorm = 0.1023, lr_0 = 2.1840e-04
Loss = 2.6827e-03, PNorm = 151.7635, GNorm = 0.0963, lr_0 = 2.1825e-04
Loss = 1.9221e-03, PNorm = 151.7681, GNorm = 0.2496, lr_0 = 2.1810e-04
Loss = 3.5318e-03, PNorm = 151.7703, GNorm = 0.2901, lr_0 = 2.1795e-04
Loss = 2.1273e-03, PNorm = 151.7720, GNorm = 0.0956, lr_0 = 2.1780e-04
Loss = 3.3579e-03, PNorm = 151.7752, GNorm = 0.2969, lr_0 = 2.1765e-04
Loss = 2.1466e-03, PNorm = 151.7840, GNorm = 0.2215, lr_0 = 2.1751e-04
Loss = 3.7772e-03, PNorm = 151.7884, GNorm = 0.1224, lr_0 = 2.1736e-04
Loss = 2.1896e-03, PNorm = 151.7939, GNorm = 0.0782, lr_0 = 2.1721e-04
Loss = 1.8640e-03, PNorm = 151.7990, GNorm = 0.1509, lr_0 = 2.1706e-04
Loss = 2.5894e-03, PNorm = 151.8049, GNorm = 0.0833, lr_0 = 2.1691e-04
Loss = 1.9565e-03, PNorm = 151.8121, GNorm = 0.0769, lr_0 = 2.1676e-04
Loss = 2.4929e-03, PNorm = 151.8158, GNorm = 0.1557, lr_0 = 2.1661e-04
Loss = 6.7722e-03, PNorm = 151.8184, GNorm = 1.1659, lr_0 = 2.1646e-04
Loss = 3.4939e-03, PNorm = 151.8223, GNorm = 0.1798, lr_0 = 2.1632e-04
Loss = 3.0890e-03, PNorm = 151.8285, GNorm = 0.1537, lr_0 = 2.1617e-04
Loss = 2.8271e-03, PNorm = 151.8327, GNorm = 0.3046, lr_0 = 2.1602e-04
Loss = 2.7243e-03, PNorm = 151.8402, GNorm = 0.0972, lr_0 = 2.1587e-04
Loss = 1.9404e-03, PNorm = 151.8458, GNorm = 0.1745, lr_0 = 2.1572e-04
Loss = 2.5202e-03, PNorm = 151.8501, GNorm = 0.1083, lr_0 = 2.1558e-04
Loss = 1.9862e-03, PNorm = 151.8540, GNorm = 0.1252, lr_0 = 2.1543e-04
Loss = 4.0821e-03, PNorm = 151.8601, GNorm = 0.2135, lr_0 = 2.1528e-04
Loss = 1.7868e-03, PNorm = 151.8656, GNorm = 0.1028, lr_0 = 2.1513e-04
Loss = 1.7831e-03, PNorm = 151.8702, GNorm = 0.1499, lr_0 = 2.1499e-04
Loss = 4.8256e-03, PNorm = 151.8729, GNorm = 0.1471, lr_0 = 2.1484e-04
Loss = 2.4753e-03, PNorm = 151.8773, GNorm = 0.0920, lr_0 = 2.1469e-04
Loss = 3.2292e-03, PNorm = 151.8825, GNorm = 0.2156, lr_0 = 2.1454e-04
Loss = 2.3193e-03, PNorm = 151.8896, GNorm = 0.0588, lr_0 = 2.1440e-04
Loss = 2.4318e-03, PNorm = 151.8936, GNorm = 0.0700, lr_0 = 2.1425e-04
Loss = 1.8955e-03, PNorm = 151.9009, GNorm = 0.1967, lr_0 = 2.1410e-04
Loss = 2.7282e-03, PNorm = 151.9077, GNorm = 0.0696, lr_0 = 2.1396e-04
Loss = 3.2650e-03, PNorm = 151.9106, GNorm = 0.1249, lr_0 = 2.1381e-04
Loss = 2.3323e-03, PNorm = 151.9130, GNorm = 0.1165, lr_0 = 2.1366e-04
Loss = 1.6355e-03, PNorm = 151.9168, GNorm = 0.0693, lr_0 = 2.1352e-04
Loss = 2.5056e-03, PNorm = 151.9211, GNorm = 0.1240, lr_0 = 2.1337e-04
Loss = 2.3440e-03, PNorm = 151.9263, GNorm = 0.1036, lr_0 = 2.1323e-04
Loss = 1.9378e-03, PNorm = 151.9322, GNorm = 0.1910, lr_0 = 2.1308e-04
Loss = 3.2707e-03, PNorm = 151.9370, GNorm = 0.0938, lr_0 = 2.1293e-04
Loss = 1.9665e-03, PNorm = 151.9413, GNorm = 0.0431, lr_0 = 2.1279e-04
Loss = 1.9720e-03, PNorm = 151.9433, GNorm = 0.1114, lr_0 = 2.1264e-04
Loss = 2.7459e-03, PNorm = 151.9481, GNorm = 0.2721, lr_0 = 2.1250e-04
Loss = 2.7447e-03, PNorm = 151.9547, GNorm = 0.0883, lr_0 = 2.1235e-04
Loss = 3.3213e-03, PNorm = 151.9617, GNorm = 0.1421, lr_0 = 2.1221e-04
Loss = 3.5474e-03, PNorm = 151.9682, GNorm = 0.1997, lr_0 = 2.1206e-04
Loss = 2.4818e-03, PNorm = 151.9724, GNorm = 0.2249, lr_0 = 2.1191e-04
Loss = 2.7969e-03, PNorm = 151.9757, GNorm = 0.1054, lr_0 = 2.1177e-04
Loss = 2.3367e-03, PNorm = 151.9800, GNorm = 0.1261, lr_0 = 2.1162e-04
Loss = 2.4259e-03, PNorm = 151.9830, GNorm = 0.0889, lr_0 = 2.1148e-04
Loss = 2.8812e-03, PNorm = 151.9869, GNorm = 0.1441, lr_0 = 2.1133e-04
Loss = 2.6033e-03, PNorm = 151.9929, GNorm = 0.2679, lr_0 = 2.1119e-04
Loss = 2.0146e-03, PNorm = 151.9984, GNorm = 0.1541, lr_0 = 2.1104e-04
Loss = 2.0436e-03, PNorm = 152.0056, GNorm = 0.0917, lr_0 = 2.1090e-04
Loss = 1.9457e-03, PNorm = 152.0129, GNorm = 0.0551, lr_0 = 2.1076e-04
Loss = 1.6850e-03, PNorm = 152.0192, GNorm = 0.0372, lr_0 = 2.1061e-04
Loss = 3.7777e-03, PNorm = 152.0240, GNorm = 0.0702, lr_0 = 2.1047e-04
Loss = 3.1606e-03, PNorm = 152.0287, GNorm = 0.1974, lr_0 = 2.1032e-04
Loss = 5.6132e-03, PNorm = 152.0341, GNorm = 0.0610, lr_0 = 2.1018e-04
Loss = 2.5307e-03, PNorm = 152.0384, GNorm = 0.0489, lr_0 = 2.1003e-04
Loss = 2.0703e-03, PNorm = 152.0435, GNorm = 0.0602, lr_0 = 2.0989e-04
Loss = 3.4398e-03, PNorm = 152.0489, GNorm = 0.1361, lr_0 = 2.0975e-04
Loss = 2.6862e-03, PNorm = 152.0557, GNorm = 0.0468, lr_0 = 2.0960e-04
Validation mae = 0.476394
Epoch 21
Loss = 3.0730e-03, PNorm = 152.0599, GNorm = 0.4151, lr_0 = 2.0946e-04
Loss = 2.8093e-03, PNorm = 152.0609, GNorm = 0.0697, lr_0 = 2.0932e-04
Loss = 2.9148e-03, PNorm = 152.0630, GNorm = 0.1422, lr_0 = 2.0917e-04
Loss = 2.3039e-03, PNorm = 152.0664, GNorm = 0.0497, lr_0 = 2.0903e-04
Loss = 2.9206e-03, PNorm = 152.0733, GNorm = 0.0531, lr_0 = 2.0889e-04
Loss = 2.5647e-03, PNorm = 152.0750, GNorm = 0.2141, lr_0 = 2.0874e-04
Loss = 2.0048e-03, PNorm = 152.0758, GNorm = 0.1543, lr_0 = 2.0860e-04
Loss = 1.7031e-03, PNorm = 152.0765, GNorm = 0.1282, lr_0 = 2.0846e-04
Loss = 2.5673e-03, PNorm = 152.0807, GNorm = 0.1485, lr_0 = 2.0831e-04
Loss = 1.7240e-03, PNorm = 152.0848, GNorm = 0.0752, lr_0 = 2.0817e-04
Loss = 1.8619e-03, PNorm = 152.0869, GNorm = 0.0694, lr_0 = 2.0803e-04
Loss = 2.1942e-03, PNorm = 152.0891, GNorm = 0.0455, lr_0 = 2.0789e-04
Loss = 1.9061e-03, PNorm = 152.0933, GNorm = 0.1237, lr_0 = 2.0774e-04
Loss = 3.2299e-03, PNorm = 152.0988, GNorm = 0.1713, lr_0 = 2.0760e-04
Loss = 1.5736e-03, PNorm = 152.1036, GNorm = 0.0892, lr_0 = 2.0746e-04
Loss = 1.6645e-03, PNorm = 152.1070, GNorm = 0.1743, lr_0 = 2.0732e-04
Loss = 1.8647e-03, PNorm = 152.1095, GNorm = 0.1316, lr_0 = 2.0718e-04
Loss = 1.6687e-03, PNorm = 152.1128, GNorm = 0.0616, lr_0 = 2.0703e-04
Loss = 1.6849e-03, PNorm = 152.1151, GNorm = 0.2251, lr_0 = 2.0689e-04
Loss = 1.9149e-03, PNorm = 152.1168, GNorm = 0.1273, lr_0 = 2.0675e-04
Loss = 2.1545e-03, PNorm = 152.1180, GNorm = 0.0698, lr_0 = 2.0661e-04
Loss = 4.2200e-03, PNorm = 152.1196, GNorm = 0.0540, lr_0 = 2.0647e-04
Loss = 2.0402e-03, PNorm = 152.1229, GNorm = 0.0841, lr_0 = 2.0633e-04
Loss = 1.7088e-03, PNorm = 152.1285, GNorm = 0.1100, lr_0 = 2.0618e-04
Loss = 1.8438e-03, PNorm = 152.1349, GNorm = 0.1267, lr_0 = 2.0604e-04
Loss = 2.3249e-03, PNorm = 152.1380, GNorm = 0.2517, lr_0 = 2.0590e-04
Loss = 2.3131e-03, PNorm = 152.1435, GNorm = 0.2903, lr_0 = 2.0576e-04
Loss = 2.1227e-03, PNorm = 152.1472, GNorm = 0.1865, lr_0 = 2.0562e-04
Loss = 2.0164e-03, PNorm = 152.1513, GNorm = 0.2204, lr_0 = 2.0548e-04
Loss = 1.7887e-03, PNorm = 152.1527, GNorm = 0.1445, lr_0 = 2.0534e-04
Loss = 1.6223e-03, PNorm = 152.1591, GNorm = 0.0591, lr_0 = 2.0520e-04
Loss = 3.3726e-03, PNorm = 152.1640, GNorm = 0.1172, lr_0 = 2.0506e-04
Loss = 2.9731e-03, PNorm = 152.1680, GNorm = 0.0820, lr_0 = 2.0492e-04
Loss = 1.4220e-03, PNorm = 152.1709, GNorm = 0.1086, lr_0 = 2.0478e-04
Loss = 2.1953e-03, PNorm = 152.1758, GNorm = 0.0707, lr_0 = 2.0464e-04
Loss = 2.3478e-03, PNorm = 152.1793, GNorm = 0.0936, lr_0 = 2.0450e-04
Loss = 2.2985e-03, PNorm = 152.1838, GNorm = 0.1448, lr_0 = 2.0436e-04
Loss = 2.3417e-03, PNorm = 152.1876, GNorm = 0.0936, lr_0 = 2.0422e-04
Loss = 2.3073e-03, PNorm = 152.1925, GNorm = 0.1698, lr_0 = 2.0408e-04
Loss = 3.1803e-03, PNorm = 152.1960, GNorm = 0.0844, lr_0 = 2.0394e-04
Loss = 1.9721e-03, PNorm = 152.2029, GNorm = 0.1012, lr_0 = 2.0380e-04
Loss = 2.2647e-03, PNorm = 152.2068, GNorm = 0.0652, lr_0 = 2.0366e-04
Loss = 2.3024e-03, PNorm = 152.2090, GNorm = 0.1732, lr_0 = 2.0352e-04
Loss = 1.6331e-03, PNorm = 152.2128, GNorm = 0.1828, lr_0 = 2.0338e-04
Loss = 2.5544e-03, PNorm = 152.2191, GNorm = 0.2041, lr_0 = 2.0324e-04
Loss = 1.5995e-03, PNorm = 152.2230, GNorm = 0.1350, lr_0 = 2.0310e-04
Loss = 3.5324e-03, PNorm = 152.2305, GNorm = 0.2334, lr_0 = 2.0296e-04
Loss = 3.1228e-03, PNorm = 152.2357, GNorm = 0.0955, lr_0 = 2.0282e-04
Loss = 1.9474e-03, PNorm = 152.2392, GNorm = 0.0832, lr_0 = 2.0268e-04
Loss = 2.1601e-03, PNorm = 152.2421, GNorm = 0.2077, lr_0 = 2.0254e-04
Loss = 1.5812e-03, PNorm = 152.2456, GNorm = 0.1708, lr_0 = 2.0240e-04
Loss = 4.9455e-03, PNorm = 152.2471, GNorm = 0.2962, lr_0 = 2.0227e-04
Loss = 4.4396e-03, PNorm = 152.2524, GNorm = 0.1065, lr_0 = 2.0213e-04
Loss = 3.1294e-03, PNorm = 152.2570, GNorm = 0.2860, lr_0 = 2.0199e-04
Loss = 2.2684e-03, PNorm = 152.2627, GNorm = 0.1268, lr_0 = 2.0185e-04
Loss = 2.9780e-03, PNorm = 152.2651, GNorm = 0.0595, lr_0 = 2.0171e-04
Loss = 2.6659e-03, PNorm = 152.2707, GNorm = 0.3260, lr_0 = 2.0157e-04
Loss = 1.9619e-03, PNorm = 152.2760, GNorm = 0.1299, lr_0 = 2.0144e-04
Loss = 2.5501e-03, PNorm = 152.2794, GNorm = 0.1912, lr_0 = 2.0130e-04
Loss = 2.0387e-03, PNorm = 152.2829, GNorm = 0.1390, lr_0 = 2.0116e-04
Loss = 3.4621e-03, PNorm = 152.2874, GNorm = 0.2335, lr_0 = 2.0102e-04
Loss = 2.8901e-03, PNorm = 152.2912, GNorm = 0.1299, lr_0 = 2.0088e-04
Loss = 2.0631e-03, PNorm = 152.2977, GNorm = 0.1498, lr_0 = 2.0075e-04
Loss = 1.9209e-03, PNorm = 152.3000, GNorm = 0.0825, lr_0 = 2.0061e-04
Loss = 3.6539e-03, PNorm = 152.3057, GNorm = 0.0583, lr_0 = 2.0047e-04
Loss = 2.1288e-03, PNorm = 152.3106, GNorm = 0.0765, lr_0 = 2.0033e-04
Loss = 1.4852e-03, PNorm = 152.3160, GNorm = 0.0979, lr_0 = 2.0020e-04
Loss = 1.6061e-03, PNorm = 152.3196, GNorm = 0.2743, lr_0 = 2.0006e-04
Loss = 2.4011e-03, PNorm = 152.3228, GNorm = 0.1030, lr_0 = 1.9992e-04
Loss = 2.6751e-03, PNorm = 152.3264, GNorm = 0.1014, lr_0 = 1.9979e-04
Loss = 2.7947e-03, PNorm = 152.3301, GNorm = 0.1961, lr_0 = 1.9965e-04
Loss = 2.2713e-03, PNorm = 152.3323, GNorm = 0.0633, lr_0 = 1.9951e-04
Loss = 6.0296e-03, PNorm = 152.3329, GNorm = 0.0543, lr_0 = 1.9938e-04
Loss = 5.4048e-03, PNorm = 152.3366, GNorm = 0.1703, lr_0 = 1.9924e-04
Loss = 1.8681e-03, PNorm = 152.3411, GNorm = 0.2958, lr_0 = 1.9910e-04
Loss = 2.8497e-03, PNorm = 152.3479, GNorm = 0.3450, lr_0 = 1.9897e-04
Loss = 1.9662e-03, PNorm = 152.3549, GNorm = 0.3356, lr_0 = 1.9883e-04
Loss = 1.9982e-03, PNorm = 152.3590, GNorm = 0.1269, lr_0 = 1.9869e-04
Loss = 2.5302e-03, PNorm = 152.3621, GNorm = 0.1128, lr_0 = 1.9856e-04
Loss = 2.2471e-03, PNorm = 152.3668, GNorm = 0.0884, lr_0 = 1.9842e-04
Loss = 3.9852e-03, PNorm = 152.3686, GNorm = 0.1660, lr_0 = 1.9829e-04
Loss = 2.1141e-03, PNorm = 152.3720, GNorm = 0.1042, lr_0 = 1.9815e-04
Loss = 2.8162e-03, PNorm = 152.3787, GNorm = 0.0892, lr_0 = 1.9801e-04
Loss = 2.6549e-03, PNorm = 152.3850, GNorm = 0.0449, lr_0 = 1.9788e-04
Loss = 1.4347e-03, PNorm = 152.3895, GNorm = 0.1792, lr_0 = 1.9774e-04
Loss = 1.5953e-03, PNorm = 152.3928, GNorm = 0.2352, lr_0 = 1.9761e-04
Loss = 2.6857e-03, PNorm = 152.3947, GNorm = 0.1844, lr_0 = 1.9747e-04
Loss = 2.3537e-03, PNorm = 152.3976, GNorm = 0.1602, lr_0 = 1.9734e-04
Loss = 2.3591e-03, PNorm = 152.4007, GNorm = 0.1058, lr_0 = 1.9720e-04
Loss = 1.7134e-03, PNorm = 152.4058, GNorm = 0.2284, lr_0 = 1.9707e-04
Loss = 2.7590e-03, PNorm = 152.4101, GNorm = 0.1348, lr_0 = 1.9693e-04
Loss = 2.5955e-03, PNorm = 152.4147, GNorm = 0.0717, lr_0 = 1.9680e-04
Loss = 1.4200e-03, PNorm = 152.4200, GNorm = 0.1286, lr_0 = 1.9666e-04
Loss = 1.9022e-03, PNorm = 152.4279, GNorm = 0.0727, lr_0 = 1.9653e-04
Loss = 1.5142e-03, PNorm = 152.4323, GNorm = 0.0645, lr_0 = 1.9639e-04
Loss = 2.8066e-03, PNorm = 152.4362, GNorm = 0.0845, lr_0 = 1.9626e-04
Loss = 1.7183e-03, PNorm = 152.4381, GNorm = 0.1568, lr_0 = 1.9612e-04
Loss = 1.4489e-03, PNorm = 152.4398, GNorm = 0.1195, lr_0 = 1.9599e-04
Loss = 1.9981e-03, PNorm = 152.4428, GNorm = 0.1585, lr_0 = 1.9585e-04
Loss = 1.8300e-03, PNorm = 152.4456, GNorm = 0.2647, lr_0 = 1.9572e-04
Loss = 1.4157e-03, PNorm = 152.4507, GNorm = 0.1307, lr_0 = 1.9559e-04
Loss = 2.0767e-03, PNorm = 152.4532, GNorm = 0.1891, lr_0 = 1.9545e-04
Loss = 1.5633e-03, PNorm = 152.4549, GNorm = 0.1975, lr_0 = 1.9532e-04
Loss = 1.7117e-03, PNorm = 152.4589, GNorm = 0.1163, lr_0 = 1.9518e-04
Loss = 3.7524e-03, PNorm = 152.4622, GNorm = 0.1305, lr_0 = 1.9505e-04
Loss = 1.7995e-03, PNorm = 152.4666, GNorm = 0.1158, lr_0 = 1.9492e-04
Loss = 2.9468e-03, PNorm = 152.4680, GNorm = 0.1166, lr_0 = 1.9478e-04
Loss = 1.7106e-03, PNorm = 152.4710, GNorm = 0.0673, lr_0 = 1.9465e-04
Loss = 1.6566e-03, PNorm = 152.4741, GNorm = 0.0660, lr_0 = 1.9452e-04
Loss = 1.5047e-03, PNorm = 152.4795, GNorm = 0.0896, lr_0 = 1.9438e-04
Loss = 2.7399e-03, PNorm = 152.4842, GNorm = 0.1110, lr_0 = 1.9425e-04
Loss = 1.7018e-03, PNorm = 152.4886, GNorm = 0.2946, lr_0 = 1.9412e-04
Loss = 2.3985e-03, PNorm = 152.4918, GNorm = 0.1254, lr_0 = 1.9398e-04
Loss = 2.4457e-03, PNorm = 152.4978, GNorm = 0.3005, lr_0 = 1.9385e-04
Loss = 2.0469e-03, PNorm = 152.5003, GNorm = 0.1950, lr_0 = 1.9372e-04
Loss = 2.7546e-03, PNorm = 152.5031, GNorm = 0.2398, lr_0 = 1.9359e-04
Loss = 1.8631e-03, PNorm = 152.5078, GNorm = 0.0590, lr_0 = 1.9345e-04
Loss = 4.4159e-03, PNorm = 152.5092, GNorm = 0.2487, lr_0 = 1.9332e-04
Loss = 2.0733e-03, PNorm = 152.5123, GNorm = 0.1828, lr_0 = 1.9319e-04
Loss = 3.5766e-03, PNorm = 152.5163, GNorm = 0.2311, lr_0 = 1.9306e-04
Validation mae = 0.477824
Epoch 22
Loss = 1.5216e-03, PNorm = 152.5205, GNorm = 0.1161, lr_0 = 1.9292e-04
Loss = 1.6078e-03, PNorm = 152.5237, GNorm = 0.1928, lr_0 = 1.9279e-04
Loss = 1.9434e-03, PNorm = 152.5292, GNorm = 0.1326, lr_0 = 1.9266e-04
Loss = 1.5340e-03, PNorm = 152.5341, GNorm = 0.0927, lr_0 = 1.9253e-04
Loss = 2.5974e-03, PNorm = 152.5392, GNorm = 0.1239, lr_0 = 1.9240e-04
Loss = 1.6341e-03, PNorm = 152.5437, GNorm = 0.0600, lr_0 = 1.9226e-04
Loss = 1.5408e-03, PNorm = 152.5491, GNorm = 0.1359, lr_0 = 1.9213e-04
Loss = 1.5586e-03, PNorm = 152.5518, GNorm = 0.0941, lr_0 = 1.9200e-04
Loss = 1.9530e-03, PNorm = 152.5535, GNorm = 0.1863, lr_0 = 1.9187e-04
Loss = 2.7439e-03, PNorm = 152.5561, GNorm = 0.1518, lr_0 = 1.9174e-04
Loss = 2.1372e-03, PNorm = 152.5576, GNorm = 0.1174, lr_0 = 1.9161e-04
Loss = 1.9769e-03, PNorm = 152.5613, GNorm = 0.2916, lr_0 = 1.9148e-04
Loss = 1.3156e-03, PNorm = 152.5668, GNorm = 0.1734, lr_0 = 1.9134e-04
Loss = 1.9007e-03, PNorm = 152.5723, GNorm = 0.0578, lr_0 = 1.9121e-04
Loss = 1.5776e-03, PNorm = 152.5760, GNorm = 0.1224, lr_0 = 1.9108e-04
Loss = 1.6518e-03, PNorm = 152.5806, GNorm = 0.0485, lr_0 = 1.9095e-04
Loss = 5.0815e-03, PNorm = 152.5823, GNorm = 0.1363, lr_0 = 1.9082e-04
Loss = 1.6130e-03, PNorm = 152.5860, GNorm = 0.0989, lr_0 = 1.9069e-04
Loss = 1.7078e-03, PNorm = 152.5902, GNorm = 0.1771, lr_0 = 1.9056e-04
Loss = 3.1449e-03, PNorm = 152.5960, GNorm = 0.1693, lr_0 = 1.9043e-04
Loss = 1.3815e-03, PNorm = 152.5997, GNorm = 0.0962, lr_0 = 1.9030e-04
Loss = 2.7256e-03, PNorm = 152.6038, GNorm = 0.0881, lr_0 = 1.9017e-04
Loss = 2.3023e-03, PNorm = 152.6081, GNorm = 0.2514, lr_0 = 1.9004e-04
Loss = 1.5498e-03, PNorm = 152.6114, GNorm = 0.1219, lr_0 = 1.8991e-04
Loss = 1.8708e-03, PNorm = 152.6122, GNorm = 0.2496, lr_0 = 1.8978e-04
Loss = 2.3659e-03, PNorm = 152.6169, GNorm = 0.2905, lr_0 = 1.8965e-04
Loss = 2.0182e-03, PNorm = 152.6213, GNorm = 0.1029, lr_0 = 1.8952e-04
Loss = 1.4995e-03, PNorm = 152.6232, GNorm = 0.1596, lr_0 = 1.8939e-04
Loss = 1.5450e-03, PNorm = 152.6259, GNorm = 0.0692, lr_0 = 1.8926e-04
Loss = 3.7614e-03, PNorm = 152.6292, GNorm = 0.2037, lr_0 = 1.8913e-04
Loss = 1.8117e-03, PNorm = 152.6301, GNorm = 0.2487, lr_0 = 1.8900e-04
Loss = 1.8363e-03, PNorm = 152.6314, GNorm = 0.2276, lr_0 = 1.8887e-04
Loss = 1.3210e-03, PNorm = 152.6300, GNorm = 0.0612, lr_0 = 1.8874e-04
Loss = 2.4762e-03, PNorm = 152.6343, GNorm = 0.2194, lr_0 = 1.8861e-04
Loss = 1.7575e-03, PNorm = 152.6376, GNorm = 0.0927, lr_0 = 1.8848e-04
Loss = 2.5177e-03, PNorm = 152.6426, GNorm = 0.1057, lr_0 = 1.8835e-04
Loss = 1.9091e-03, PNorm = 152.6459, GNorm = 0.1159, lr_0 = 1.8822e-04
Loss = 2.8031e-03, PNorm = 152.6462, GNorm = 0.0735, lr_0 = 1.8809e-04
Loss = 3.1947e-03, PNorm = 152.6500, GNorm = 0.4391, lr_0 = 1.8797e-04
Loss = 1.5290e-03, PNorm = 152.6534, GNorm = 0.0403, lr_0 = 1.8784e-04
Loss = 1.3922e-03, PNorm = 152.6568, GNorm = 0.0611, lr_0 = 1.8771e-04
Loss = 1.8071e-03, PNorm = 152.6597, GNorm = 0.2410, lr_0 = 1.8758e-04
Loss = 2.4422e-03, PNorm = 152.6620, GNorm = 0.1370, lr_0 = 1.8745e-04
Loss = 2.8853e-03, PNorm = 152.6658, GNorm = 0.0625, lr_0 = 1.8732e-04
Loss = 1.7064e-03, PNorm = 152.6656, GNorm = 0.1161, lr_0 = 1.8719e-04
Loss = 3.7909e-03, PNorm = 152.6693, GNorm = 0.1798, lr_0 = 1.8707e-04
Loss = 1.2644e-03, PNorm = 152.6709, GNorm = 0.0697, lr_0 = 1.8694e-04
Loss = 1.9380e-03, PNorm = 152.6746, GNorm = 0.0857, lr_0 = 1.8681e-04
Loss = 2.6288e-03, PNorm = 152.6782, GNorm = 0.0985, lr_0 = 1.8668e-04
Loss = 1.4013e-03, PNorm = 152.6812, GNorm = 0.1690, lr_0 = 1.8655e-04
Loss = 1.5860e-03, PNorm = 152.6840, GNorm = 0.1335, lr_0 = 1.8643e-04
Loss = 3.1092e-03, PNorm = 152.6859, GNorm = 0.1157, lr_0 = 1.8630e-04
Loss = 1.7149e-03, PNorm = 152.6901, GNorm = 0.1110, lr_0 = 1.8617e-04
Loss = 1.3448e-03, PNorm = 152.6958, GNorm = 0.0519, lr_0 = 1.8604e-04
Loss = 1.4568e-03, PNorm = 152.7021, GNorm = 0.1536, lr_0 = 1.8592e-04
Loss = 1.3930e-03, PNorm = 152.7076, GNorm = 0.2248, lr_0 = 1.8579e-04
Loss = 3.0198e-03, PNorm = 152.7116, GNorm = 0.3112, lr_0 = 1.8566e-04
Loss = 1.5197e-03, PNorm = 152.7137, GNorm = 0.1109, lr_0 = 1.8553e-04
Loss = 2.0864e-03, PNorm = 152.7160, GNorm = 0.1165, lr_0 = 1.8541e-04
Loss = 2.0489e-03, PNorm = 152.7209, GNorm = 0.1284, lr_0 = 1.8528e-04
Loss = 2.0833e-03, PNorm = 152.7247, GNorm = 0.0947, lr_0 = 1.8515e-04
Loss = 1.8707e-03, PNorm = 152.7292, GNorm = 0.0410, lr_0 = 1.8503e-04
Loss = 3.0822e-03, PNorm = 152.7351, GNorm = 0.1674, lr_0 = 1.8490e-04
Loss = 3.5589e-03, PNorm = 152.7401, GNorm = 0.0489, lr_0 = 1.8477e-04
Loss = 1.4214e-03, PNorm = 152.7426, GNorm = 0.0478, lr_0 = 1.8465e-04
Loss = 1.6784e-03, PNorm = 152.7441, GNorm = 0.2280, lr_0 = 1.8452e-04
Loss = 3.9889e-03, PNorm = 152.7476, GNorm = 0.1538, lr_0 = 1.8439e-04
Loss = 2.0635e-03, PNorm = 152.7494, GNorm = 0.6881, lr_0 = 1.8427e-04
Loss = 2.0857e-03, PNorm = 152.7515, GNorm = 0.1837, lr_0 = 1.8414e-04
Loss = 1.5972e-03, PNorm = 152.7565, GNorm = 0.1343, lr_0 = 1.8401e-04
Loss = 5.3250e-03, PNorm = 152.7613, GNorm = 0.2326, lr_0 = 1.8389e-04
Loss = 2.2102e-03, PNorm = 152.7633, GNorm = 0.0968, lr_0 = 1.8376e-04
Loss = 1.3816e-03, PNorm = 152.7643, GNorm = 0.1752, lr_0 = 1.8364e-04
Loss = 1.4048e-03, PNorm = 152.7669, GNorm = 0.0642, lr_0 = 1.8351e-04
Loss = 1.2578e-03, PNorm = 152.7693, GNorm = 0.1140, lr_0 = 1.8338e-04
Loss = 2.3126e-03, PNorm = 152.7727, GNorm = 0.0432, lr_0 = 1.8326e-04
Loss = 2.6504e-03, PNorm = 152.7752, GNorm = 0.1752, lr_0 = 1.8313e-04
Loss = 2.2819e-03, PNorm = 152.7797, GNorm = 0.2761, lr_0 = 1.8301e-04
Loss = 2.2540e-03, PNorm = 152.7829, GNorm = 0.0792, lr_0 = 1.8288e-04
Loss = 1.4897e-03, PNorm = 152.7886, GNorm = 0.1119, lr_0 = 1.8276e-04
Loss = 1.7143e-03, PNorm = 152.7924, GNorm = 0.2157, lr_0 = 1.8263e-04
Loss = 2.7044e-03, PNorm = 152.7965, GNorm = 0.1294, lr_0 = 1.8251e-04
Loss = 1.9616e-03, PNorm = 152.8012, GNorm = 0.2236, lr_0 = 1.8238e-04
Loss = 2.9768e-03, PNorm = 152.8044, GNorm = 0.3746, lr_0 = 1.8226e-04
Loss = 4.3299e-03, PNorm = 152.8084, GNorm = 0.0888, lr_0 = 1.8213e-04
Loss = 1.8290e-03, PNorm = 152.8120, GNorm = 0.1077, lr_0 = 1.8201e-04
Loss = 2.3868e-03, PNorm = 152.8162, GNorm = 0.2053, lr_0 = 1.8188e-04
Loss = 2.2092e-03, PNorm = 152.8219, GNorm = 0.1653, lr_0 = 1.8176e-04
Loss = 3.9052e-03, PNorm = 152.8268, GNorm = 0.1386, lr_0 = 1.8163e-04
Loss = 2.5191e-03, PNorm = 152.8297, GNorm = 0.2131, lr_0 = 1.8151e-04
Loss = 2.6630e-03, PNorm = 152.8342, GNorm = 0.2364, lr_0 = 1.8138e-04
Loss = 1.9811e-03, PNorm = 152.8363, GNorm = 0.1489, lr_0 = 1.8126e-04
Loss = 2.3482e-03, PNorm = 152.8396, GNorm = 0.0419, lr_0 = 1.8114e-04
Loss = 2.5027e-03, PNorm = 152.8446, GNorm = 0.1847, lr_0 = 1.8101e-04
Loss = 1.4003e-03, PNorm = 152.8480, GNorm = 0.1260, lr_0 = 1.8089e-04
Loss = 1.7677e-03, PNorm = 152.8512, GNorm = 0.2524, lr_0 = 1.8076e-04
Loss = 1.3374e-03, PNorm = 152.8520, GNorm = 0.0814, lr_0 = 1.8064e-04
Loss = 2.5780e-03, PNorm = 152.8570, GNorm = 0.1670, lr_0 = 1.8052e-04
Loss = 1.8302e-03, PNorm = 152.8615, GNorm = 0.2512, lr_0 = 1.8039e-04
Loss = 1.4366e-03, PNorm = 152.8660, GNorm = 0.1583, lr_0 = 1.8027e-04
Loss = 2.1809e-03, PNorm = 152.8684, GNorm = 0.2556, lr_0 = 1.8015e-04
Loss = 1.7286e-03, PNorm = 152.8707, GNorm = 0.0463, lr_0 = 1.8002e-04
Loss = 2.6204e-03, PNorm = 152.8736, GNorm = 0.1839, lr_0 = 1.7990e-04
Loss = 1.7332e-03, PNorm = 152.8770, GNorm = 0.0663, lr_0 = 1.7978e-04
Loss = 1.2497e-03, PNorm = 152.8801, GNorm = 0.0411, lr_0 = 1.7965e-04
Loss = 1.7581e-03, PNorm = 152.8833, GNorm = 0.1365, lr_0 = 1.7953e-04
Loss = 1.5923e-03, PNorm = 152.8849, GNorm = 0.0599, lr_0 = 1.7941e-04
Loss = 1.6141e-03, PNorm = 152.8890, GNorm = 0.1115, lr_0 = 1.7928e-04
Loss = 1.3947e-03, PNorm = 152.8941, GNorm = 0.1021, lr_0 = 1.7916e-04
Loss = 1.5306e-03, PNorm = 152.8994, GNorm = 0.0771, lr_0 = 1.7904e-04
Loss = 3.5444e-03, PNorm = 152.9035, GNorm = 0.0615, lr_0 = 1.7892e-04
Loss = 1.8874e-03, PNorm = 152.9061, GNorm = 0.1963, lr_0 = 1.7879e-04
Loss = 3.4958e-03, PNorm = 152.9104, GNorm = 0.1846, lr_0 = 1.7867e-04
Loss = 1.3670e-03, PNorm = 152.9140, GNorm = 0.1295, lr_0 = 1.7855e-04
Loss = 1.4645e-03, PNorm = 152.9164, GNorm = 0.0842, lr_0 = 1.7843e-04
Loss = 1.9432e-03, PNorm = 152.9197, GNorm = 0.0466, lr_0 = 1.7830e-04
Loss = 2.3404e-03, PNorm = 152.9242, GNorm = 0.0580, lr_0 = 1.7818e-04
Loss = 1.7715e-03, PNorm = 152.9306, GNorm = 0.1129, lr_0 = 1.7806e-04
Loss = 1.3111e-03, PNorm = 152.9344, GNorm = 0.1894, lr_0 = 1.7794e-04
Loss = 2.5567e-03, PNorm = 152.9406, GNorm = 0.1532, lr_0 = 1.7782e-04
Validation mae = 0.475506
Epoch 23
Loss = 2.1891e-03, PNorm = 152.9423, GNorm = 0.2077, lr_0 = 1.7769e-04
Loss = 1.0393e-03, PNorm = 152.9435, GNorm = 0.1025, lr_0 = 1.7757e-04
Loss = 1.1238e-03, PNorm = 152.9449, GNorm = 0.0877, lr_0 = 1.7745e-04
Loss = 1.5886e-03, PNorm = 152.9477, GNorm = 0.1932, lr_0 = 1.7733e-04
Loss = 1.7169e-03, PNorm = 152.9516, GNorm = 0.1165, lr_0 = 1.7721e-04
Loss = 1.9013e-03, PNorm = 152.9522, GNorm = 0.0886, lr_0 = 1.7709e-04
Loss = 2.3760e-03, PNorm = 152.9534, GNorm = 0.1679, lr_0 = 1.7696e-04
Loss = 2.2502e-03, PNorm = 152.9526, GNorm = 0.0866, lr_0 = 1.7684e-04
Loss = 1.7883e-03, PNorm = 152.9553, GNorm = 0.0704, lr_0 = 1.7672e-04
Loss = 1.2142e-03, PNorm = 152.9574, GNorm = 0.0712, lr_0 = 1.7660e-04
Loss = 1.2026e-03, PNorm = 152.9615, GNorm = 0.1488, lr_0 = 1.7648e-04
Loss = 1.2328e-03, PNorm = 152.9619, GNorm = 0.1929, lr_0 = 1.7636e-04
Loss = 1.5110e-03, PNorm = 152.9628, GNorm = 0.0966, lr_0 = 1.7624e-04
Loss = 1.3572e-03, PNorm = 152.9643, GNorm = 0.1551, lr_0 = 1.7612e-04
Loss = 1.1834e-03, PNorm = 152.9664, GNorm = 0.0249, lr_0 = 1.7600e-04
Loss = 1.8274e-03, PNorm = 152.9693, GNorm = 0.2514, lr_0 = 1.7588e-04
Loss = 2.3429e-03, PNorm = 152.9707, GNorm = 0.2087, lr_0 = 1.7576e-04
Loss = 2.3850e-03, PNorm = 152.9729, GNorm = 0.0877, lr_0 = 1.7564e-04
Loss = 1.8182e-03, PNorm = 152.9750, GNorm = 0.1058, lr_0 = 1.7552e-04
Loss = 2.6454e-03, PNorm = 152.9796, GNorm = 0.1133, lr_0 = 1.7540e-04
Loss = 1.9409e-03, PNorm = 152.9845, GNorm = 0.1213, lr_0 = 1.7528e-04
Loss = 1.6885e-03, PNorm = 152.9868, GNorm = 0.1929, lr_0 = 1.7516e-04
Loss = 1.4350e-03, PNorm = 152.9878, GNorm = 0.2288, lr_0 = 1.7504e-04
Loss = 1.3937e-03, PNorm = 152.9895, GNorm = 0.0480, lr_0 = 1.7492e-04
Loss = 1.7978e-03, PNorm = 152.9891, GNorm = 0.1764, lr_0 = 1.7480e-04
Loss = 1.3537e-03, PNorm = 152.9915, GNorm = 0.0772, lr_0 = 1.7468e-04
Loss = 1.6285e-03, PNorm = 152.9941, GNorm = 0.0364, lr_0 = 1.7456e-04
Loss = 1.9901e-03, PNorm = 152.9971, GNorm = 0.0819, lr_0 = 1.7444e-04
Loss = 1.5727e-03, PNorm = 152.9999, GNorm = 0.0815, lr_0 = 1.7432e-04
Loss = 1.0825e-03, PNorm = 153.0034, GNorm = 0.0514, lr_0 = 1.7420e-04
Loss = 1.5159e-03, PNorm = 153.0051, GNorm = 0.0818, lr_0 = 1.7408e-04
Loss = 1.2662e-03, PNorm = 153.0088, GNorm = 0.0701, lr_0 = 1.7396e-04
Loss = 1.9105e-03, PNorm = 153.0134, GNorm = 0.0479, lr_0 = 1.7384e-04
Loss = 1.0244e-03, PNorm = 153.0159, GNorm = 0.0595, lr_0 = 1.7372e-04
Loss = 1.1743e-03, PNorm = 153.0187, GNorm = 0.1057, lr_0 = 1.7360e-04
Loss = 2.4666e-03, PNorm = 153.0196, GNorm = 0.1349, lr_0 = 1.7348e-04
Loss = 1.2829e-03, PNorm = 153.0218, GNorm = 0.0797, lr_0 = 1.7336e-04
Loss = 1.7182e-03, PNorm = 153.0228, GNorm = 0.1067, lr_0 = 1.7325e-04
Loss = 1.3480e-03, PNorm = 153.0262, GNorm = 0.0465, lr_0 = 1.7313e-04
Loss = 1.6430e-03, PNorm = 153.0291, GNorm = 0.0480, lr_0 = 1.7301e-04
Loss = 1.2115e-03, PNorm = 153.0330, GNorm = 0.1481, lr_0 = 1.7289e-04
Loss = 1.5968e-03, PNorm = 153.0359, GNorm = 0.0835, lr_0 = 1.7277e-04
Loss = 1.3841e-03, PNorm = 153.0379, GNorm = 0.1438, lr_0 = 1.7265e-04
Loss = 1.2489e-03, PNorm = 153.0403, GNorm = 0.2189, lr_0 = 1.7253e-04
Loss = 1.3252e-03, PNorm = 153.0421, GNorm = 0.0749, lr_0 = 1.7242e-04
Loss = 2.8631e-03, PNorm = 153.0453, GNorm = 0.1131, lr_0 = 1.7230e-04
Loss = 1.5740e-03, PNorm = 153.0466, GNorm = 0.1475, lr_0 = 1.7218e-04
Loss = 1.1681e-03, PNorm = 153.0489, GNorm = 0.1225, lr_0 = 1.7206e-04
Loss = 1.3438e-03, PNorm = 153.0517, GNorm = 0.1759, lr_0 = 1.7194e-04
Loss = 1.5714e-03, PNorm = 153.0541, GNorm = 0.1280, lr_0 = 1.7183e-04
Loss = 1.9511e-03, PNorm = 153.0579, GNorm = 0.1064, lr_0 = 1.7171e-04
Loss = 3.9178e-03, PNorm = 153.0615, GNorm = 0.1967, lr_0 = 1.7159e-04
Loss = 4.6447e-03, PNorm = 153.0644, GNorm = 0.2983, lr_0 = 1.7147e-04
Loss = 1.8530e-03, PNorm = 153.0673, GNorm = 0.1064, lr_0 = 1.7136e-04
Loss = 1.5048e-03, PNorm = 153.0711, GNorm = 0.1055, lr_0 = 1.7124e-04
Loss = 1.6553e-03, PNorm = 153.0734, GNorm = 0.1115, lr_0 = 1.7112e-04
Loss = 1.0757e-03, PNorm = 153.0766, GNorm = 0.1404, lr_0 = 1.7100e-04
Loss = 1.5743e-03, PNorm = 153.0792, GNorm = 0.1748, lr_0 = 1.7089e-04
Loss = 1.5403e-03, PNorm = 153.0826, GNorm = 0.1321, lr_0 = 1.7077e-04
Loss = 2.3297e-03, PNorm = 153.0872, GNorm = 0.1634, lr_0 = 1.7065e-04
Loss = 1.6089e-03, PNorm = 153.0902, GNorm = 0.0522, lr_0 = 1.7054e-04
Loss = 1.0837e-03, PNorm = 153.0921, GNorm = 0.0404, lr_0 = 1.7042e-04
Loss = 1.1906e-03, PNorm = 153.0947, GNorm = 0.1878, lr_0 = 1.7030e-04
Loss = 1.1580e-03, PNorm = 153.0967, GNorm = 0.0373, lr_0 = 1.7019e-04
Loss = 2.4807e-03, PNorm = 153.0992, GNorm = 0.0732, lr_0 = 1.7007e-04
Loss = 1.0929e-03, PNorm = 153.1020, GNorm = 0.0455, lr_0 = 1.6995e-04
Loss = 1.6282e-03, PNorm = 153.1049, GNorm = 0.0561, lr_0 = 1.6984e-04
Loss = 1.5503e-03, PNorm = 153.1065, GNorm = 0.2213, lr_0 = 1.6972e-04
Loss = 2.1735e-03, PNorm = 153.1096, GNorm = 0.0593, lr_0 = 1.6960e-04
Loss = 1.6048e-03, PNorm = 153.1121, GNorm = 0.0555, lr_0 = 1.6949e-04
Loss = 1.3453e-03, PNorm = 153.1157, GNorm = 0.2144, lr_0 = 1.6937e-04
Loss = 2.5663e-03, PNorm = 153.1200, GNorm = 0.0775, lr_0 = 1.6926e-04
Loss = 3.6967e-03, PNorm = 153.1209, GNorm = 0.0910, lr_0 = 1.6914e-04
Loss = 3.2546e-03, PNorm = 153.1267, GNorm = 0.1486, lr_0 = 1.6902e-04
Loss = 1.7772e-03, PNorm = 153.1310, GNorm = 0.1513, lr_0 = 1.6891e-04
Loss = 9.9950e-04, PNorm = 153.1333, GNorm = 0.1296, lr_0 = 1.6879e-04
Loss = 1.9410e-03, PNorm = 153.1346, GNorm = 0.1793, lr_0 = 1.6868e-04
Loss = 1.7663e-03, PNorm = 153.1355, GNorm = 0.1300, lr_0 = 1.6856e-04
Loss = 1.5482e-03, PNorm = 153.1399, GNorm = 0.1142, lr_0 = 1.6845e-04
Loss = 1.1512e-03, PNorm = 153.1434, GNorm = 0.1127, lr_0 = 1.6833e-04
Loss = 2.7078e-03, PNorm = 153.1441, GNorm = 0.0908, lr_0 = 1.6821e-04
Loss = 1.7008e-03, PNorm = 153.1462, GNorm = 0.1613, lr_0 = 1.6810e-04
Loss = 3.1277e-03, PNorm = 153.1507, GNorm = 0.0851, lr_0 = 1.6798e-04
Loss = 2.0022e-03, PNorm = 153.1548, GNorm = 0.0859, lr_0 = 1.6787e-04
Loss = 1.5463e-03, PNorm = 153.1590, GNorm = 0.0474, lr_0 = 1.6775e-04
Loss = 4.5239e-03, PNorm = 153.1615, GNorm = 0.1830, lr_0 = 1.6764e-04
Loss = 2.3104e-03, PNorm = 153.1658, GNorm = 0.4338, lr_0 = 1.6752e-04
Loss = 3.0105e-03, PNorm = 153.1679, GNorm = 0.3007, lr_0 = 1.6741e-04
Loss = 1.9872e-03, PNorm = 153.1726, GNorm = 0.2167, lr_0 = 1.6729e-04
Loss = 1.3267e-03, PNorm = 153.1766, GNorm = 0.0904, lr_0 = 1.6718e-04
Loss = 1.1431e-03, PNorm = 153.1790, GNorm = 0.1115, lr_0 = 1.6707e-04
Loss = 1.8163e-03, PNorm = 153.1810, GNorm = 0.2650, lr_0 = 1.6695e-04
Loss = 4.1700e-03, PNorm = 153.1816, GNorm = 0.0411, lr_0 = 1.6684e-04
Loss = 2.2010e-03, PNorm = 153.1839, GNorm = 0.1191, lr_0 = 1.6672e-04
Loss = 4.1396e-03, PNorm = 153.1869, GNorm = 0.0464, lr_0 = 1.6661e-04
Loss = 1.9568e-03, PNorm = 153.1893, GNorm = 0.1135, lr_0 = 1.6649e-04
Loss = 1.2634e-03, PNorm = 153.1919, GNorm = 0.0727, lr_0 = 1.6638e-04
Loss = 1.7795e-03, PNorm = 153.1953, GNorm = 0.1290, lr_0 = 1.6627e-04
Loss = 1.2953e-03, PNorm = 153.1987, GNorm = 0.1771, lr_0 = 1.6615e-04
Loss = 1.8528e-03, PNorm = 153.2016, GNorm = 0.0999, lr_0 = 1.6604e-04
Loss = 1.2480e-03, PNorm = 153.2032, GNorm = 0.1681, lr_0 = 1.6592e-04
Loss = 1.9794e-03, PNorm = 153.2063, GNorm = 0.0713, lr_0 = 1.6581e-04
Loss = 1.3722e-03, PNorm = 153.2083, GNorm = 0.0596, lr_0 = 1.6570e-04
Loss = 2.4930e-03, PNorm = 153.2135, GNorm = 0.2908, lr_0 = 1.6558e-04
Loss = 3.2818e-03, PNorm = 153.2175, GNorm = 0.0527, lr_0 = 1.6547e-04
Loss = 2.0689e-03, PNorm = 153.2218, GNorm = 0.0333, lr_0 = 1.6536e-04
Loss = 2.7658e-03, PNorm = 153.2259, GNorm = 0.0662, lr_0 = 1.6524e-04
Loss = 3.4958e-03, PNorm = 153.2283, GNorm = 0.3192, lr_0 = 1.6513e-04
Loss = 1.7165e-03, PNorm = 153.2327, GNorm = 0.1938, lr_0 = 1.6502e-04
Loss = 1.6333e-03, PNorm = 153.2361, GNorm = 0.0500, lr_0 = 1.6490e-04
Loss = 1.3006e-03, PNorm = 153.2394, GNorm = 0.1294, lr_0 = 1.6479e-04
Loss = 1.2066e-03, PNorm = 153.2431, GNorm = 0.2375, lr_0 = 1.6468e-04
Loss = 1.4127e-03, PNorm = 153.2461, GNorm = 0.0534, lr_0 = 1.6457e-04
Loss = 1.7973e-03, PNorm = 153.2496, GNorm = 0.0467, lr_0 = 1.6445e-04
Loss = 1.0857e-03, PNorm = 153.2515, GNorm = 0.1015, lr_0 = 1.6434e-04
Loss = 2.5517e-03, PNorm = 153.2551, GNorm = 0.1523, lr_0 = 1.6423e-04
Loss = 1.6186e-03, PNorm = 153.2564, GNorm = 0.1316, lr_0 = 1.6412e-04
Loss = 1.3288e-03, PNorm = 153.2597, GNorm = 0.3362, lr_0 = 1.6400e-04
Loss = 1.3100e-03, PNorm = 153.2640, GNorm = 0.0979, lr_0 = 1.6389e-04
Loss = 4.6935e-03, PNorm = 153.2656, GNorm = 0.2116, lr_0 = 1.6378e-04
Validation mae = 0.477115
Epoch 24
Loss = 1.2277e-03, PNorm = 153.2698, GNorm = 0.0630, lr_0 = 1.6367e-04
Loss = 9.9537e-04, PNorm = 153.2718, GNorm = 0.1044, lr_0 = 1.6355e-04
Loss = 1.2796e-03, PNorm = 153.2758, GNorm = 0.0692, lr_0 = 1.6344e-04
Loss = 1.3905e-03, PNorm = 153.2791, GNorm = 0.0881, lr_0 = 1.6333e-04
Loss = 1.2275e-03, PNorm = 153.2815, GNorm = 0.0457, lr_0 = 1.6322e-04
Loss = 1.4470e-03, PNorm = 153.2829, GNorm = 0.0261, lr_0 = 1.6311e-04
Loss = 1.3641e-03, PNorm = 153.2857, GNorm = 0.2026, lr_0 = 1.6299e-04
Loss = 1.3605e-03, PNorm = 153.2877, GNorm = 0.0404, lr_0 = 1.6288e-04
Loss = 2.0620e-03, PNorm = 153.2920, GNorm = 0.0785, lr_0 = 1.6277e-04
Loss = 2.0136e-03, PNorm = 153.2934, GNorm = 0.1744, lr_0 = 1.6266e-04
Loss = 9.7443e-04, PNorm = 153.2961, GNorm = 0.0531, lr_0 = 1.6255e-04
Loss = 1.0585e-03, PNorm = 153.2976, GNorm = 0.0951, lr_0 = 1.6244e-04
Loss = 1.0487e-03, PNorm = 153.3000, GNorm = 0.0427, lr_0 = 1.6233e-04
Loss = 1.2001e-03, PNorm = 153.3012, GNorm = 0.1571, lr_0 = 1.6221e-04
Loss = 1.0665e-03, PNorm = 153.3037, GNorm = 0.0551, lr_0 = 1.6210e-04
Loss = 1.1606e-03, PNorm = 153.3052, GNorm = 0.0282, lr_0 = 1.6199e-04
Loss = 1.5526e-03, PNorm = 153.3061, GNorm = 0.1226, lr_0 = 1.6188e-04
Loss = 2.6065e-03, PNorm = 153.3048, GNorm = 0.1370, lr_0 = 1.6177e-04
Loss = 1.0138e-03, PNorm = 153.3058, GNorm = 0.0555, lr_0 = 1.6166e-04
Loss = 1.4219e-03, PNorm = 153.3078, GNorm = 0.0735, lr_0 = 1.6155e-04
Loss = 1.4395e-03, PNorm = 153.3105, GNorm = 0.1403, lr_0 = 1.6144e-04
Loss = 1.5818e-03, PNorm = 153.3119, GNorm = 0.2138, lr_0 = 1.6133e-04
Loss = 2.4658e-03, PNorm = 153.3146, GNorm = 0.1825, lr_0 = 1.6122e-04
Loss = 1.0780e-03, PNorm = 153.3150, GNorm = 0.0563, lr_0 = 1.6111e-04
Loss = 9.6445e-04, PNorm = 153.3169, GNorm = 0.0923, lr_0 = 1.6100e-04
Loss = 1.8356e-03, PNorm = 153.3172, GNorm = 0.0886, lr_0 = 1.6089e-04
Loss = 1.6050e-03, PNorm = 153.3175, GNorm = 0.0350, lr_0 = 1.6078e-04
Loss = 1.9582e-03, PNorm = 153.3179, GNorm = 0.1310, lr_0 = 1.6067e-04
Loss = 1.4898e-03, PNorm = 153.3209, GNorm = 0.1457, lr_0 = 1.6056e-04
Loss = 1.8647e-03, PNorm = 153.3213, GNorm = 0.2405, lr_0 = 1.6045e-04
Loss = 9.8530e-04, PNorm = 153.3240, GNorm = 0.1785, lr_0 = 1.6034e-04
Loss = 1.1601e-03, PNorm = 153.3254, GNorm = 0.0449, lr_0 = 1.6023e-04
Loss = 1.1265e-03, PNorm = 153.3283, GNorm = 0.1611, lr_0 = 1.6012e-04
Loss = 1.0791e-03, PNorm = 153.3310, GNorm = 0.0531, lr_0 = 1.6001e-04
Loss = 1.1577e-03, PNorm = 153.3343, GNorm = 0.1317, lr_0 = 1.5990e-04
Loss = 2.4405e-03, PNorm = 153.3358, GNorm = 0.2354, lr_0 = 1.5979e-04
Loss = 1.3269e-03, PNorm = 153.3386, GNorm = 0.0773, lr_0 = 1.5968e-04
Loss = 1.6971e-03, PNorm = 153.3416, GNorm = 0.0800, lr_0 = 1.5957e-04
Loss = 1.2610e-03, PNorm = 153.3452, GNorm = 0.0665, lr_0 = 1.5946e-04
Loss = 1.8404e-03, PNorm = 153.3489, GNorm = 0.0428, lr_0 = 1.5935e-04
Loss = 3.0592e-03, PNorm = 153.3522, GNorm = 0.0907, lr_0 = 1.5924e-04
Loss = 1.0991e-03, PNorm = 153.3549, GNorm = 0.1079, lr_0 = 1.5913e-04
Loss = 1.9331e-03, PNorm = 153.3588, GNorm = 0.1211, lr_0 = 1.5902e-04
Loss = 2.8724e-03, PNorm = 153.3609, GNorm = 0.0777, lr_0 = 1.5891e-04
Loss = 1.9976e-03, PNorm = 153.3641, GNorm = 0.1393, lr_0 = 1.5880e-04
Loss = 1.7780e-03, PNorm = 153.3695, GNorm = 0.3941, lr_0 = 1.5870e-04
Loss = 2.1552e-03, PNorm = 153.3713, GNorm = 0.0612, lr_0 = 1.5859e-04
Loss = 1.4522e-03, PNorm = 153.3728, GNorm = 0.1377, lr_0 = 1.5848e-04
Loss = 1.6628e-03, PNorm = 153.3759, GNorm = 0.2704, lr_0 = 1.5837e-04
Loss = 1.9780e-03, PNorm = 153.3794, GNorm = 0.1888, lr_0 = 1.5826e-04
Loss = 1.0194e-03, PNorm = 153.3812, GNorm = 0.0595, lr_0 = 1.5815e-04
Loss = 1.8790e-03, PNorm = 153.3856, GNorm = 0.2389, lr_0 = 1.5804e-04
Loss = 1.0075e-03, PNorm = 153.3894, GNorm = 0.1067, lr_0 = 1.5794e-04
Loss = 1.8593e-03, PNorm = 153.3922, GNorm = 0.0725, lr_0 = 1.5783e-04
Loss = 1.0329e-03, PNorm = 153.3945, GNorm = 0.1475, lr_0 = 1.5772e-04
Loss = 1.1753e-03, PNorm = 153.3958, GNorm = 0.0515, lr_0 = 1.5761e-04
Loss = 9.1045e-04, PNorm = 153.3999, GNorm = 0.1038, lr_0 = 1.5750e-04
Loss = 2.3532e-03, PNorm = 153.4047, GNorm = 0.7280, lr_0 = 1.5740e-04
Loss = 1.0338e-03, PNorm = 153.4073, GNorm = 0.0773, lr_0 = 1.5729e-04
Loss = 1.2000e-03, PNorm = 153.4092, GNorm = 0.0736, lr_0 = 1.5718e-04
Loss = 1.1986e-03, PNorm = 153.4110, GNorm = 0.0767, lr_0 = 1.5707e-04
Loss = 1.1623e-03, PNorm = 153.4125, GNorm = 0.1591, lr_0 = 1.5697e-04
Loss = 2.0295e-03, PNorm = 153.4141, GNorm = 0.3709, lr_0 = 1.5686e-04
Loss = 1.9102e-03, PNorm = 153.4165, GNorm = 0.0999, lr_0 = 1.5675e-04
Loss = 2.0221e-03, PNorm = 153.4197, GNorm = 0.1675, lr_0 = 1.5664e-04
Loss = 1.9332e-03, PNorm = 153.4216, GNorm = 0.0710, lr_0 = 1.5654e-04
Loss = 1.8091e-03, PNorm = 153.4231, GNorm = 0.1346, lr_0 = 1.5643e-04
Loss = 2.0769e-03, PNorm = 153.4247, GNorm = 0.1909, lr_0 = 1.5632e-04
Loss = 1.4296e-03, PNorm = 153.4273, GNorm = 0.0860, lr_0 = 1.5621e-04
Loss = 2.6677e-03, PNorm = 153.4290, GNorm = 0.3263, lr_0 = 1.5611e-04
Loss = 1.4951e-03, PNorm = 153.4322, GNorm = 0.1026, lr_0 = 1.5600e-04
Loss = 1.5221e-03, PNorm = 153.4347, GNorm = 0.1584, lr_0 = 1.5589e-04
Loss = 1.4913e-03, PNorm = 153.4356, GNorm = 0.0699, lr_0 = 1.5579e-04
Loss = 1.8251e-03, PNorm = 153.4373, GNorm = 0.0473, lr_0 = 1.5568e-04
Loss = 1.6274e-03, PNorm = 153.4389, GNorm = 0.0358, lr_0 = 1.5557e-04
Loss = 1.0165e-03, PNorm = 153.4431, GNorm = 0.1387, lr_0 = 1.5547e-04
Loss = 2.3681e-03, PNorm = 153.4433, GNorm = 0.0609, lr_0 = 1.5536e-04
Loss = 1.7051e-03, PNorm = 153.4451, GNorm = 0.1106, lr_0 = 1.5525e-04
Loss = 2.0010e-03, PNorm = 153.4476, GNorm = 0.0888, lr_0 = 1.5515e-04
Loss = 2.3564e-03, PNorm = 153.4503, GNorm = 0.2017, lr_0 = 1.5504e-04
Loss = 1.5908e-03, PNorm = 153.4532, GNorm = 0.3501, lr_0 = 1.5493e-04
Loss = 1.0944e-03, PNorm = 153.4578, GNorm = 0.0861, lr_0 = 1.5483e-04
Loss = 5.2058e-03, PNorm = 153.4615, GNorm = 0.0616, lr_0 = 1.5472e-04
Loss = 1.0203e-03, PNorm = 153.4635, GNorm = 0.1271, lr_0 = 1.5462e-04
Loss = 2.1110e-03, PNorm = 153.4659, GNorm = 0.1013, lr_0 = 1.5451e-04
Loss = 1.5853e-03, PNorm = 153.4663, GNorm = 0.2273, lr_0 = 1.5440e-04
Loss = 2.2133e-03, PNorm = 153.4683, GNorm = 0.0879, lr_0 = 1.5430e-04
Loss = 1.1668e-03, PNorm = 153.4709, GNorm = 0.0696, lr_0 = 1.5419e-04
Loss = 1.3118e-03, PNorm = 153.4744, GNorm = 0.0469, lr_0 = 1.5409e-04
Loss = 2.7220e-03, PNorm = 153.4777, GNorm = 0.1026, lr_0 = 1.5398e-04
Loss = 1.4349e-03, PNorm = 153.4806, GNorm = 0.1221, lr_0 = 1.5388e-04
Loss = 1.2831e-03, PNorm = 153.4821, GNorm = 0.0726, lr_0 = 1.5377e-04
Loss = 2.0675e-03, PNorm = 153.4837, GNorm = 0.0981, lr_0 = 1.5367e-04
Loss = 1.3675e-03, PNorm = 153.4880, GNorm = 0.0891, lr_0 = 1.5356e-04
Loss = 2.8706e-03, PNorm = 153.4895, GNorm = 0.0590, lr_0 = 1.5346e-04
Loss = 9.4511e-04, PNorm = 153.4925, GNorm = 0.1467, lr_0 = 1.5335e-04
Loss = 1.4446e-03, PNorm = 153.4966, GNorm = 0.1349, lr_0 = 1.5325e-04
Loss = 1.2248e-03, PNorm = 153.4998, GNorm = 0.0835, lr_0 = 1.5314e-04
Loss = 1.2162e-03, PNorm = 153.5015, GNorm = 0.0845, lr_0 = 1.5304e-04
Loss = 2.1683e-03, PNorm = 153.5033, GNorm = 0.0644, lr_0 = 1.5293e-04
Loss = 3.1870e-03, PNorm = 153.5053, GNorm = 0.0853, lr_0 = 1.5283e-04
Loss = 1.5492e-03, PNorm = 153.5093, GNorm = 0.0494, lr_0 = 1.5272e-04
Loss = 1.0802e-03, PNorm = 153.5120, GNorm = 0.1334, lr_0 = 1.5262e-04
Loss = 1.8350e-03, PNorm = 153.5150, GNorm = 0.1400, lr_0 = 1.5251e-04
Loss = 2.9393e-03, PNorm = 153.5167, GNorm = 0.1251, lr_0 = 1.5241e-04
Loss = 1.3421e-03, PNorm = 153.5180, GNorm = 0.0827, lr_0 = 1.5230e-04
Loss = 1.7932e-03, PNorm = 153.5205, GNorm = 0.0578, lr_0 = 1.5220e-04
Loss = 6.5185e-03, PNorm = 153.5222, GNorm = 0.2803, lr_0 = 1.5209e-04
Loss = 3.6078e-03, PNorm = 153.5253, GNorm = 0.1304, lr_0 = 1.5199e-04
Loss = 9.9098e-04, PNorm = 153.5275, GNorm = 0.0535, lr_0 = 1.5189e-04
Loss = 1.9833e-03, PNorm = 153.5315, GNorm = 0.2170, lr_0 = 1.5178e-04
Loss = 2.0317e-03, PNorm = 153.5355, GNorm = 0.0389, lr_0 = 1.5168e-04
Loss = 1.1070e-03, PNorm = 153.5379, GNorm = 0.0735, lr_0 = 1.5157e-04
Loss = 1.0219e-03, PNorm = 153.5399, GNorm = 0.1706, lr_0 = 1.5147e-04
Loss = 1.8789e-03, PNorm = 153.5408, GNorm = 0.4487, lr_0 = 1.5137e-04
Loss = 1.5742e-03, PNorm = 153.5440, GNorm = 0.1173, lr_0 = 1.5126e-04
Loss = 1.6474e-03, PNorm = 153.5460, GNorm = 0.1260, lr_0 = 1.5116e-04
Loss = 2.2114e-03, PNorm = 153.5483, GNorm = 0.0394, lr_0 = 1.5106e-04
Loss = 9.0870e-04, PNorm = 153.5504, GNorm = 0.0438, lr_0 = 1.5095e-04
Loss = 1.1374e-03, PNorm = 153.5520, GNorm = 0.1406, lr_0 = 1.5085e-04
Validation mae = 0.475888
Epoch 25
Loss = 2.0322e-03, PNorm = 153.5539, GNorm = 0.0786, lr_0 = 1.5075e-04
Loss = 1.4553e-03, PNorm = 153.5554, GNorm = 0.2131, lr_0 = 1.5064e-04
Loss = 7.1824e-04, PNorm = 153.5569, GNorm = 0.0788, lr_0 = 1.5054e-04
Loss = 9.9855e-04, PNorm = 153.5584, GNorm = 0.0681, lr_0 = 1.5044e-04
Loss = 1.8727e-03, PNorm = 153.5596, GNorm = 0.0766, lr_0 = 1.5033e-04
Loss = 3.2527e-03, PNorm = 153.5610, GNorm = 0.1869, lr_0 = 1.5023e-04
Loss = 1.0311e-03, PNorm = 153.5636, GNorm = 0.0609, lr_0 = 1.5013e-04
Loss = 8.6854e-04, PNorm = 153.5668, GNorm = 0.0660, lr_0 = 1.5002e-04
Loss = 1.2629e-03, PNorm = 153.5701, GNorm = 0.0907, lr_0 = 1.4992e-04
Loss = 8.1818e-04, PNorm = 153.5721, GNorm = 0.0791, lr_0 = 1.4982e-04
Loss = 9.1715e-04, PNorm = 153.5747, GNorm = 0.0677, lr_0 = 1.4972e-04
Loss = 2.0725e-03, PNorm = 153.5764, GNorm = 0.2539, lr_0 = 1.4961e-04
Loss = 9.5225e-04, PNorm = 153.5766, GNorm = 0.1180, lr_0 = 1.4951e-04
Loss = 3.3154e-03, PNorm = 153.5787, GNorm = 0.2398, lr_0 = 1.4941e-04
Loss = 1.1356e-03, PNorm = 153.5805, GNorm = 0.0400, lr_0 = 1.4931e-04
Loss = 1.8940e-03, PNorm = 153.5824, GNorm = 0.0694, lr_0 = 1.4920e-04
Loss = 8.4634e-04, PNorm = 153.5836, GNorm = 0.0966, lr_0 = 1.4910e-04
Loss = 1.0187e-03, PNorm = 153.5867, GNorm = 0.0755, lr_0 = 1.4900e-04
Loss = 1.5880e-03, PNorm = 153.5878, GNorm = 0.1839, lr_0 = 1.4890e-04
Loss = 9.0434e-04, PNorm = 153.5893, GNorm = 0.0673, lr_0 = 1.4880e-04
Loss = 1.4746e-03, PNorm = 153.5906, GNorm = 0.0304, lr_0 = 1.4869e-04
Loss = 9.7630e-04, PNorm = 153.5930, GNorm = 0.1249, lr_0 = 1.4859e-04
Loss = 9.6789e-04, PNorm = 153.5955, GNorm = 0.1129, lr_0 = 1.4849e-04
Loss = 1.4533e-03, PNorm = 153.5983, GNorm = 0.0891, lr_0 = 1.4839e-04
Loss = 1.1041e-03, PNorm = 153.6006, GNorm = 0.0662, lr_0 = 1.4829e-04
Loss = 8.3538e-04, PNorm = 153.6016, GNorm = 0.2308, lr_0 = 1.4818e-04
Loss = 7.6798e-04, PNorm = 153.6029, GNorm = 0.0572, lr_0 = 1.4808e-04
Loss = 2.7588e-03, PNorm = 153.6060, GNorm = 0.1781, lr_0 = 1.4798e-04
Loss = 1.6229e-03, PNorm = 153.6070, GNorm = 0.1397, lr_0 = 1.4788e-04
Loss = 8.7102e-04, PNorm = 153.6105, GNorm = 0.0476, lr_0 = 1.4778e-04
Loss = 9.9250e-04, PNorm = 153.6121, GNorm = 0.0923, lr_0 = 1.4768e-04
Loss = 9.2235e-04, PNorm = 153.6129, GNorm = 0.0478, lr_0 = 1.4758e-04
Loss = 2.0295e-03, PNorm = 153.6148, GNorm = 0.2352, lr_0 = 1.4748e-04
Loss = 1.0056e-03, PNorm = 153.6176, GNorm = 0.1216, lr_0 = 1.4737e-04
Loss = 1.2273e-03, PNorm = 153.6192, GNorm = 0.0883, lr_0 = 1.4727e-04
Loss = 1.2391e-03, PNorm = 153.6222, GNorm = 0.0515, lr_0 = 1.4717e-04
Loss = 9.9700e-04, PNorm = 153.6214, GNorm = 0.1286, lr_0 = 1.4707e-04
Loss = 1.3713e-03, PNorm = 153.6208, GNorm = 0.1867, lr_0 = 1.4697e-04
Loss = 7.0519e-04, PNorm = 153.6228, GNorm = 0.2310, lr_0 = 1.4687e-04
Loss = 1.3714e-03, PNorm = 153.6258, GNorm = 0.0447, lr_0 = 1.4677e-04
Loss = 1.1384e-03, PNorm = 153.6279, GNorm = 0.0760, lr_0 = 1.4667e-04
Loss = 1.4353e-03, PNorm = 153.6290, GNorm = 0.0673, lr_0 = 1.4657e-04
Loss = 7.5495e-04, PNorm = 153.6306, GNorm = 0.1016, lr_0 = 1.4647e-04
Loss = 1.1154e-03, PNorm = 153.6323, GNorm = 0.0813, lr_0 = 1.4637e-04
Loss = 1.1226e-03, PNorm = 153.6332, GNorm = 0.0869, lr_0 = 1.4627e-04
Loss = 8.6749e-04, PNorm = 153.6341, GNorm = 0.0534, lr_0 = 1.4617e-04
Loss = 8.5296e-04, PNorm = 153.6351, GNorm = 0.2269, lr_0 = 1.4607e-04
Loss = 9.1813e-04, PNorm = 153.6375, GNorm = 0.0813, lr_0 = 1.4597e-04
Loss = 4.3284e-03, PNorm = 153.6409, GNorm = 0.0780, lr_0 = 1.4587e-04
Loss = 3.8518e-03, PNorm = 153.6434, GNorm = 0.2244, lr_0 = 1.4577e-04
Loss = 1.1196e-03, PNorm = 153.6453, GNorm = 0.1063, lr_0 = 1.4567e-04
Loss = 1.0421e-03, PNorm = 153.6456, GNorm = 0.0390, lr_0 = 1.4557e-04
Loss = 1.6742e-03, PNorm = 153.6474, GNorm = 0.0825, lr_0 = 1.4547e-04
Loss = 1.3252e-03, PNorm = 153.6500, GNorm = 0.0876, lr_0 = 1.4537e-04
Loss = 1.7039e-03, PNorm = 153.6529, GNorm = 0.1002, lr_0 = 1.4527e-04
Loss = 7.2154e-04, PNorm = 153.6545, GNorm = 0.0720, lr_0 = 1.4517e-04
Loss = 1.0949e-03, PNorm = 153.6570, GNorm = 0.0322, lr_0 = 1.4507e-04
Loss = 2.0185e-03, PNorm = 153.6598, GNorm = 0.0539, lr_0 = 1.4497e-04
Loss = 1.5822e-03, PNorm = 153.6617, GNorm = 0.0415, lr_0 = 1.4487e-04
Loss = 4.7012e-03, PNorm = 153.6641, GNorm = 0.0884, lr_0 = 1.4477e-04
Loss = 4.0430e-03, PNorm = 153.6672, GNorm = 0.0895, lr_0 = 1.4467e-04
Loss = 2.1667e-03, PNorm = 153.6699, GNorm = 0.1016, lr_0 = 1.4457e-04
Loss = 1.8810e-03, PNorm = 153.6730, GNorm = 0.0520, lr_0 = 1.4447e-04
Loss = 9.5875e-04, PNorm = 153.6769, GNorm = 0.0703, lr_0 = 1.4438e-04
Loss = 2.0150e-03, PNorm = 153.6807, GNorm = 0.1238, lr_0 = 1.4428e-04
Loss = 8.8942e-04, PNorm = 153.6857, GNorm = 0.0661, lr_0 = 1.4418e-04
Loss = 1.5431e-03, PNorm = 153.6861, GNorm = 0.0916, lr_0 = 1.4408e-04
Loss = 1.2358e-03, PNorm = 153.6896, GNorm = 0.1053, lr_0 = 1.4398e-04
Loss = 8.9408e-04, PNorm = 153.6923, GNorm = 0.0679, lr_0 = 1.4388e-04
Loss = 9.8562e-04, PNorm = 153.6935, GNorm = 0.0500, lr_0 = 1.4378e-04
Loss = 1.8050e-03, PNorm = 153.6950, GNorm = 0.2529, lr_0 = 1.4368e-04
Loss = 1.7932e-03, PNorm = 153.6963, GNorm = 0.0308, lr_0 = 1.4359e-04
Loss = 3.2683e-03, PNorm = 153.6960, GNorm = 0.1113, lr_0 = 1.4349e-04
Loss = 1.0060e-03, PNorm = 153.6963, GNorm = 0.2554, lr_0 = 1.4339e-04
Loss = 1.8632e-03, PNorm = 153.6978, GNorm = 0.1171, lr_0 = 1.4329e-04
Loss = 2.1668e-03, PNorm = 153.7000, GNorm = 0.1359, lr_0 = 1.4319e-04
Loss = 1.8318e-03, PNorm = 153.7034, GNorm = 0.1559, lr_0 = 1.4310e-04
Loss = 4.0727e-03, PNorm = 153.7048, GNorm = 0.1364, lr_0 = 1.4300e-04
Loss = 1.5114e-03, PNorm = 153.7070, GNorm = 0.0973, lr_0 = 1.4290e-04
Loss = 9.3136e-04, PNorm = 153.7099, GNorm = 0.0451, lr_0 = 1.4280e-04
Loss = 1.3792e-03, PNorm = 153.7125, GNorm = 0.0707, lr_0 = 1.4270e-04
Loss = 1.0623e-03, PNorm = 153.7131, GNorm = 0.0209, lr_0 = 1.4261e-04
Loss = 3.5033e-03, PNorm = 153.7150, GNorm = 0.0468, lr_0 = 1.4251e-04
Loss = 7.9890e-04, PNorm = 153.7158, GNorm = 0.1953, lr_0 = 1.4241e-04
Loss = 1.2800e-03, PNorm = 153.7174, GNorm = 0.0883, lr_0 = 1.4231e-04
Loss = 1.7605e-03, PNorm = 153.7184, GNorm = 0.0768, lr_0 = 1.4222e-04
Loss = 1.7467e-03, PNorm = 153.7192, GNorm = 0.0574, lr_0 = 1.4212e-04
Loss = 1.2704e-03, PNorm = 153.7206, GNorm = 0.1127, lr_0 = 1.4202e-04
Loss = 9.2940e-04, PNorm = 153.7219, GNorm = 0.1516, lr_0 = 1.4192e-04
Loss = 2.0180e-03, PNorm = 153.7248, GNorm = 0.1212, lr_0 = 1.4183e-04
Loss = 1.1842e-03, PNorm = 153.7273, GNorm = 0.0578, lr_0 = 1.4173e-04
Loss = 7.1454e-04, PNorm = 153.7293, GNorm = 0.1323, lr_0 = 1.4163e-04
Loss = 1.1518e-03, PNorm = 153.7310, GNorm = 0.0561, lr_0 = 1.4153e-04
Loss = 1.3337e-03, PNorm = 153.7339, GNorm = 0.0364, lr_0 = 1.4144e-04
Loss = 1.5132e-03, PNorm = 153.7376, GNorm = 0.1052, lr_0 = 1.4134e-04
Loss = 8.0577e-04, PNorm = 153.7417, GNorm = 0.1456, lr_0 = 1.4124e-04
Loss = 1.0367e-03, PNorm = 153.7438, GNorm = 0.1217, lr_0 = 1.4115e-04
Loss = 2.5415e-03, PNorm = 153.7449, GNorm = 0.1219, lr_0 = 1.4105e-04
Loss = 1.0535e-03, PNorm = 153.7481, GNorm = 0.1968, lr_0 = 1.4095e-04
Loss = 8.0260e-04, PNorm = 153.7508, GNorm = 0.0530, lr_0 = 1.4086e-04
Loss = 2.3577e-03, PNorm = 153.7543, GNorm = 0.0812, lr_0 = 1.4076e-04
Loss = 1.8747e-03, PNorm = 153.7573, GNorm = 0.1105, lr_0 = 1.4066e-04
Loss = 1.2448e-03, PNorm = 153.7587, GNorm = 0.0437, lr_0 = 1.4057e-04
Loss = 1.4227e-03, PNorm = 153.7600, GNorm = 0.0867, lr_0 = 1.4047e-04
Loss = 9.8573e-04, PNorm = 153.7627, GNorm = 0.0507, lr_0 = 1.4038e-04
Loss = 1.1399e-03, PNorm = 153.7644, GNorm = 0.1330, lr_0 = 1.4028e-04
Loss = 9.7861e-04, PNorm = 153.7675, GNorm = 0.1033, lr_0 = 1.4018e-04
Loss = 1.2130e-03, PNorm = 153.7696, GNorm = 0.0771, lr_0 = 1.4009e-04
Loss = 1.2893e-03, PNorm = 153.7718, GNorm = 0.3208, lr_0 = 1.3999e-04
Loss = 1.7196e-03, PNorm = 153.7718, GNorm = 0.1381, lr_0 = 1.3990e-04
Loss = 7.2059e-04, PNorm = 153.7730, GNorm = 0.0463, lr_0 = 1.3980e-04
Loss = 3.4612e-03, PNorm = 153.7761, GNorm = 0.5827, lr_0 = 1.3970e-04
Loss = 1.5853e-03, PNorm = 153.7779, GNorm = 0.1648, lr_0 = 1.3961e-04
Loss = 2.0030e-03, PNorm = 153.7812, GNorm = 0.1977, lr_0 = 1.3951e-04
Loss = 3.5744e-03, PNorm = 153.7836, GNorm = 0.1501, lr_0 = 1.3942e-04
Loss = 1.0457e-03, PNorm = 153.7859, GNorm = 0.0454, lr_0 = 1.3932e-04
Loss = 1.4340e-03, PNorm = 153.7889, GNorm = 0.1402, lr_0 = 1.3923e-04
Loss = 2.2703e-03, PNorm = 153.7903, GNorm = 0.0803, lr_0 = 1.3913e-04
Loss = 1.4356e-03, PNorm = 153.7925, GNorm = 0.1057, lr_0 = 1.3904e-04
Loss = 7.4431e-04, PNorm = 153.7972, GNorm = 0.0885, lr_0 = 1.3894e-04
Validation mae = 0.476086
Epoch 26
Loss = 8.4167e-04, PNorm = 153.7983, GNorm = 0.0642, lr_0 = 1.3884e-04
Loss = 8.7453e-04, PNorm = 153.7987, GNorm = 0.0700, lr_0 = 1.3875e-04
Loss = 1.2259e-03, PNorm = 153.7997, GNorm = 0.1983, lr_0 = 1.3865e-04
Loss = 1.1894e-03, PNorm = 153.8009, GNorm = 0.0500, lr_0 = 1.3856e-04
Loss = 1.1459e-03, PNorm = 153.8013, GNorm = 0.1315, lr_0 = 1.3846e-04
Loss = 1.1911e-03, PNorm = 153.8036, GNorm = 0.0950, lr_0 = 1.3837e-04
Loss = 8.8640e-04, PNorm = 153.8030, GNorm = 0.0772, lr_0 = 1.3828e-04
Loss = 1.9434e-03, PNorm = 153.8026, GNorm = 0.2235, lr_0 = 1.3818e-04
Loss = 9.1314e-04, PNorm = 153.8039, GNorm = 0.0634, lr_0 = 1.3809e-04
Loss = 1.9997e-03, PNorm = 153.8059, GNorm = 0.1358, lr_0 = 1.3799e-04
Loss = 1.8270e-03, PNorm = 153.8074, GNorm = 0.0879, lr_0 = 1.3790e-04
Loss = 7.4357e-04, PNorm = 153.8096, GNorm = 0.1229, lr_0 = 1.3780e-04
Loss = 1.3408e-03, PNorm = 153.8101, GNorm = 0.0655, lr_0 = 1.3771e-04
Loss = 1.6060e-03, PNorm = 153.8105, GNorm = 0.0506, lr_0 = 1.3761e-04
Loss = 9.4990e-04, PNorm = 153.8124, GNorm = 0.0840, lr_0 = 1.3752e-04
Loss = 6.5711e-04, PNorm = 153.8136, GNorm = 0.0676, lr_0 = 1.3742e-04
Loss = 1.1226e-03, PNorm = 153.8153, GNorm = 0.0895, lr_0 = 1.3733e-04
Loss = 7.1320e-04, PNorm = 153.8161, GNorm = 0.0527, lr_0 = 1.3724e-04
Loss = 1.4324e-03, PNorm = 153.8184, GNorm = 0.1178, lr_0 = 1.3714e-04
Loss = 9.1933e-04, PNorm = 153.8190, GNorm = 0.1136, lr_0 = 1.3705e-04
Loss = 1.0093e-03, PNorm = 153.8203, GNorm = 0.0264, lr_0 = 1.3695e-04
Loss = 1.1408e-03, PNorm = 153.8220, GNorm = 0.0517, lr_0 = 1.3686e-04
Loss = 9.8975e-04, PNorm = 153.8246, GNorm = 0.0681, lr_0 = 1.3677e-04
Loss = 1.9905e-03, PNorm = 153.8253, GNorm = 0.0723, lr_0 = 1.3667e-04
Loss = 2.4632e-03, PNorm = 153.8265, GNorm = 0.5195, lr_0 = 1.3658e-04
Loss = 9.4217e-04, PNorm = 153.8282, GNorm = 0.0657, lr_0 = 1.3649e-04
Loss = 1.1667e-03, PNorm = 153.8298, GNorm = 0.1530, lr_0 = 1.3639e-04
Loss = 1.5919e-03, PNorm = 153.8298, GNorm = 0.0831, lr_0 = 1.3630e-04
Loss = 7.2474e-04, PNorm = 153.8296, GNorm = 0.0863, lr_0 = 1.3621e-04
Loss = 1.1428e-03, PNorm = 153.8316, GNorm = 0.0859, lr_0 = 1.3611e-04
Loss = 2.0638e-03, PNorm = 153.8326, GNorm = 0.1047, lr_0 = 1.3602e-04
Loss = 1.3996e-03, PNorm = 153.8347, GNorm = 0.0388, lr_0 = 1.3593e-04
Loss = 2.5207e-03, PNorm = 153.8365, GNorm = 0.1080, lr_0 = 1.3583e-04
Loss = 1.1481e-03, PNorm = 153.8388, GNorm = 0.1023, lr_0 = 1.3574e-04
Loss = 9.3540e-04, PNorm = 153.8413, GNorm = 0.1312, lr_0 = 1.3565e-04
Loss = 6.8273e-04, PNorm = 153.8436, GNorm = 0.0995, lr_0 = 1.3555e-04
Loss = 1.5264e-03, PNorm = 153.8451, GNorm = 0.0499, lr_0 = 1.3546e-04
Loss = 1.0440e-03, PNorm = 153.8456, GNorm = 0.0516, lr_0 = 1.3537e-04
Loss = 2.2107e-03, PNorm = 153.8466, GNorm = 0.0789, lr_0 = 1.3528e-04
Loss = 1.9401e-03, PNorm = 153.8481, GNorm = 0.0574, lr_0 = 1.3518e-04
Loss = 2.0144e-03, PNorm = 153.8491, GNorm = 0.1665, lr_0 = 1.3509e-04
Loss = 7.8325e-04, PNorm = 153.8520, GNorm = 0.1286, lr_0 = 1.3500e-04
Loss = 6.9164e-04, PNorm = 153.8553, GNorm = 0.2283, lr_0 = 1.3491e-04
Loss = 1.0237e-03, PNorm = 153.8575, GNorm = 0.0575, lr_0 = 1.3481e-04
Loss = 7.3754e-04, PNorm = 153.8580, GNorm = 0.0536, lr_0 = 1.3472e-04
Loss = 7.4580e-04, PNorm = 153.8604, GNorm = 0.0974, lr_0 = 1.3463e-04
Loss = 1.5890e-03, PNorm = 153.8610, GNorm = 0.0852, lr_0 = 1.3454e-04
Loss = 1.8462e-03, PNorm = 153.8614, GNorm = 0.1427, lr_0 = 1.3444e-04
Loss = 1.6293e-03, PNorm = 153.8633, GNorm = 0.0539, lr_0 = 1.3435e-04
Loss = 6.4550e-04, PNorm = 153.8662, GNorm = 0.1161, lr_0 = 1.3426e-04
Loss = 4.6733e-03, PNorm = 153.8713, GNorm = 0.1899, lr_0 = 1.3417e-04
Loss = 8.6869e-04, PNorm = 153.8736, GNorm = 0.0784, lr_0 = 1.3408e-04
Loss = 1.5463e-03, PNorm = 153.8765, GNorm = 0.2307, lr_0 = 1.3398e-04
Loss = 1.1229e-03, PNorm = 153.8777, GNorm = 0.0387, lr_0 = 1.3389e-04
Loss = 2.0940e-03, PNorm = 153.8774, GNorm = 0.0611, lr_0 = 1.3380e-04
Loss = 1.6678e-03, PNorm = 153.8770, GNorm = 0.0453, lr_0 = 1.3371e-04
Loss = 1.0135e-03, PNorm = 153.8786, GNorm = 0.0568, lr_0 = 1.3362e-04
Loss = 1.7944e-03, PNorm = 153.8807, GNorm = 0.0967, lr_0 = 1.3353e-04
Loss = 1.3719e-03, PNorm = 153.8831, GNorm = 0.0717, lr_0 = 1.3343e-04
Loss = 9.5308e-04, PNorm = 153.8850, GNorm = 0.0825, lr_0 = 1.3334e-04
Loss = 4.4048e-03, PNorm = 153.8872, GNorm = 0.0951, lr_0 = 1.3325e-04
Loss = 1.1914e-03, PNorm = 153.8889, GNorm = 0.2145, lr_0 = 1.3316e-04
Loss = 1.1306e-03, PNorm = 153.8911, GNorm = 0.1375, lr_0 = 1.3307e-04
Loss = 9.5953e-04, PNorm = 153.8930, GNorm = 0.0658, lr_0 = 1.3298e-04
Loss = 1.8395e-03, PNorm = 153.8973, GNorm = 0.0667, lr_0 = 1.3289e-04
Loss = 8.0459e-04, PNorm = 153.9001, GNorm = 0.0504, lr_0 = 1.3280e-04
Loss = 8.8455e-04, PNorm = 153.9009, GNorm = 0.0982, lr_0 = 1.3270e-04
Loss = 1.2675e-03, PNorm = 153.9030, GNorm = 0.0730, lr_0 = 1.3261e-04
Loss = 9.0096e-04, PNorm = 153.9039, GNorm = 0.0435, lr_0 = 1.3252e-04
Loss = 8.8510e-04, PNorm = 153.9043, GNorm = 0.1324, lr_0 = 1.3243e-04
Loss = 7.0691e-04, PNorm = 153.9055, GNorm = 0.0424, lr_0 = 1.3234e-04
Loss = 2.6838e-03, PNorm = 153.9079, GNorm = 0.1263, lr_0 = 1.3225e-04
Loss = 1.5502e-03, PNorm = 153.9089, GNorm = 0.0942, lr_0 = 1.3216e-04
Loss = 1.4990e-03, PNorm = 153.9098, GNorm = 0.1586, lr_0 = 1.3207e-04
Loss = 8.8862e-04, PNorm = 153.9121, GNorm = 0.1922, lr_0 = 1.3198e-04
Loss = 9.3471e-04, PNorm = 153.9141, GNorm = 0.0455, lr_0 = 1.3189e-04
Loss = 1.9460e-03, PNorm = 153.9145, GNorm = 0.2912, lr_0 = 1.3180e-04
Loss = 1.2134e-03, PNorm = 153.9172, GNorm = 0.0770, lr_0 = 1.3171e-04
Loss = 1.3982e-03, PNorm = 153.9183, GNorm = 0.0484, lr_0 = 1.3162e-04
Loss = 9.1220e-04, PNorm = 153.9200, GNorm = 0.0629, lr_0 = 1.3153e-04
Loss = 6.7863e-04, PNorm = 153.9225, GNorm = 0.0896, lr_0 = 1.3144e-04
Loss = 8.7685e-04, PNorm = 153.9257, GNorm = 0.0464, lr_0 = 1.3135e-04
Loss = 1.5390e-03, PNorm = 153.9251, GNorm = 0.1019, lr_0 = 1.3126e-04
Loss = 7.3089e-04, PNorm = 153.9271, GNorm = 0.0762, lr_0 = 1.3117e-04
Loss = 1.1251e-03, PNorm = 153.9277, GNorm = 0.0784, lr_0 = 1.3108e-04
Loss = 1.2494e-03, PNorm = 153.9294, GNorm = 0.0704, lr_0 = 1.3099e-04
Loss = 1.4331e-03, PNorm = 153.9302, GNorm = 0.0768, lr_0 = 1.3090e-04
Loss = 1.2323e-03, PNorm = 153.9321, GNorm = 0.2126, lr_0 = 1.3081e-04
Loss = 1.6939e-03, PNorm = 153.9329, GNorm = 0.1388, lr_0 = 1.3072e-04
Loss = 3.8448e-03, PNorm = 153.9356, GNorm = 0.2248, lr_0 = 1.3063e-04
Loss = 1.1601e-03, PNorm = 153.9398, GNorm = 0.0778, lr_0 = 1.3054e-04
Loss = 2.8705e-03, PNorm = 153.9401, GNorm = 0.0579, lr_0 = 1.3045e-04
Loss = 2.0763e-03, PNorm = 153.9408, GNorm = 0.0878, lr_0 = 1.3036e-04
Loss = 1.9533e-03, PNorm = 153.9415, GNorm = 0.3388, lr_0 = 1.3027e-04
Loss = 1.7368e-03, PNorm = 153.9426, GNorm = 0.0719, lr_0 = 1.3018e-04
Loss = 1.4375e-03, PNorm = 153.9468, GNorm = 0.1741, lr_0 = 1.3009e-04
Loss = 1.0443e-03, PNorm = 153.9506, GNorm = 0.0643, lr_0 = 1.3000e-04
Loss = 2.3565e-03, PNorm = 153.9541, GNorm = 0.1075, lr_0 = 1.2992e-04
Loss = 6.1037e-04, PNorm = 153.9556, GNorm = 0.1221, lr_0 = 1.2983e-04
Loss = 1.4642e-03, PNorm = 153.9584, GNorm = 0.1004, lr_0 = 1.2974e-04
Loss = 2.8004e-03, PNorm = 153.9611, GNorm = 0.0993, lr_0 = 1.2965e-04
Loss = 1.0481e-03, PNorm = 153.9643, GNorm = 0.0771, lr_0 = 1.2956e-04
Loss = 8.2080e-04, PNorm = 153.9656, GNorm = 0.1782, lr_0 = 1.2947e-04
Loss = 1.6932e-03, PNorm = 153.9675, GNorm = 0.1497, lr_0 = 1.2938e-04
Loss = 1.6584e-03, PNorm = 153.9669, GNorm = 0.0605, lr_0 = 1.2929e-04
Loss = 6.7288e-04, PNorm = 153.9672, GNorm = 0.0798, lr_0 = 1.2921e-04
Loss = 7.4893e-04, PNorm = 153.9676, GNorm = 0.1543, lr_0 = 1.2912e-04
Loss = 8.3591e-04, PNorm = 153.9692, GNorm = 0.0663, lr_0 = 1.2903e-04
Loss = 1.6814e-03, PNorm = 153.9704, GNorm = 0.2428, lr_0 = 1.2894e-04
Loss = 2.1299e-03, PNorm = 153.9706, GNorm = 0.0384, lr_0 = 1.2885e-04
Loss = 6.8437e-04, PNorm = 153.9734, GNorm = 0.0580, lr_0 = 1.2876e-04
Loss = 7.7418e-04, PNorm = 153.9736, GNorm = 0.0284, lr_0 = 1.2867e-04
Loss = 8.4046e-04, PNorm = 153.9764, GNorm = 0.1341, lr_0 = 1.2859e-04
Loss = 2.3489e-03, PNorm = 153.9786, GNorm = 0.0816, lr_0 = 1.2850e-04
Loss = 2.0499e-03, PNorm = 153.9786, GNorm = 0.1598, lr_0 = 1.2841e-04
Loss = 9.2737e-04, PNorm = 153.9816, GNorm = 0.1153, lr_0 = 1.2832e-04
Loss = 1.7424e-03, PNorm = 153.9830, GNorm = 0.0653, lr_0 = 1.2823e-04
Loss = 8.5832e-04, PNorm = 153.9842, GNorm = 0.0362, lr_0 = 1.2815e-04
Loss = 1.9833e-03, PNorm = 153.9859, GNorm = 0.0450, lr_0 = 1.2806e-04
Loss = 8.5995e-04, PNorm = 153.9883, GNorm = 0.0886, lr_0 = 1.2797e-04
Validation mae = 0.475794
Epoch 27
Loss = 8.1975e-04, PNorm = 153.9906, GNorm = 0.1417, lr_0 = 1.2788e-04
Loss = 5.4978e-04, PNorm = 153.9923, GNorm = 0.1152, lr_0 = 1.2780e-04
Loss = 5.7860e-04, PNorm = 153.9925, GNorm = 0.0722, lr_0 = 1.2771e-04
Loss = 5.8335e-04, PNorm = 153.9943, GNorm = 0.0838, lr_0 = 1.2762e-04
Loss = 1.1872e-03, PNorm = 153.9961, GNorm = 0.0821, lr_0 = 1.2753e-04
Loss = 8.7806e-04, PNorm = 153.9969, GNorm = 0.2468, lr_0 = 1.2745e-04
Loss = 7.0161e-04, PNorm = 153.9990, GNorm = 0.1220, lr_0 = 1.2736e-04
Loss = 8.4763e-04, PNorm = 154.0005, GNorm = 0.0879, lr_0 = 1.2727e-04
Loss = 5.0605e-04, PNorm = 154.0028, GNorm = 0.0730, lr_0 = 1.2718e-04
Loss = 1.7951e-03, PNorm = 154.0045, GNorm = 0.0526, lr_0 = 1.2710e-04
Loss = 6.9859e-04, PNorm = 154.0058, GNorm = 0.0474, lr_0 = 1.2701e-04
Loss = 1.5647e-03, PNorm = 154.0057, GNorm = 0.1351, lr_0 = 1.2692e-04
Loss = 9.5794e-04, PNorm = 154.0069, GNorm = 0.0425, lr_0 = 1.2684e-04
Loss = 7.9666e-04, PNorm = 154.0068, GNorm = 0.0313, lr_0 = 1.2675e-04
Loss = 9.4629e-04, PNorm = 154.0092, GNorm = 0.1791, lr_0 = 1.2666e-04
Loss = 3.3290e-03, PNorm = 154.0101, GNorm = 0.0525, lr_0 = 1.2658e-04
Loss = 1.2523e-03, PNorm = 154.0111, GNorm = 0.1403, lr_0 = 1.2649e-04
Loss = 1.1977e-03, PNorm = 154.0137, GNorm = 0.1859, lr_0 = 1.2640e-04
Loss = 6.2580e-04, PNorm = 154.0151, GNorm = 0.1129, lr_0 = 1.2632e-04
Loss = 1.6396e-03, PNorm = 154.0164, GNorm = 0.5900, lr_0 = 1.2623e-04
Loss = 5.5113e-04, PNorm = 154.0177, GNorm = 0.0797, lr_0 = 1.2614e-04
Loss = 1.4934e-03, PNorm = 154.0187, GNorm = 0.1141, lr_0 = 1.2606e-04
Loss = 8.0337e-04, PNorm = 154.0188, GNorm = 0.0718, lr_0 = 1.2597e-04
Loss = 1.0310e-03, PNorm = 154.0188, GNorm = 0.0923, lr_0 = 1.2588e-04
Loss = 1.8176e-03, PNorm = 154.0177, GNorm = 0.0348, lr_0 = 1.2580e-04
Loss = 5.2585e-04, PNorm = 154.0175, GNorm = 0.0728, lr_0 = 1.2571e-04
Loss = 1.4581e-03, PNorm = 154.0195, GNorm = 0.4798, lr_0 = 1.2563e-04
Loss = 2.2966e-03, PNorm = 154.0208, GNorm = 0.1428, lr_0 = 1.2554e-04
Loss = 1.4554e-03, PNorm = 154.0213, GNorm = 0.0983, lr_0 = 1.2545e-04
Loss = 2.1432e-03, PNorm = 154.0229, GNorm = 0.1721, lr_0 = 1.2537e-04
Loss = 9.3171e-04, PNorm = 154.0249, GNorm = 0.1928, lr_0 = 1.2528e-04
Loss = 6.3177e-04, PNorm = 154.0278, GNorm = 0.0418, lr_0 = 1.2520e-04
Loss = 1.6982e-03, PNorm = 154.0304, GNorm = 0.0981, lr_0 = 1.2511e-04
Loss = 7.2239e-04, PNorm = 154.0322, GNorm = 0.1069, lr_0 = 1.2502e-04
Loss = 9.5418e-04, PNorm = 154.0342, GNorm = 0.0692, lr_0 = 1.2494e-04
Loss = 6.2289e-04, PNorm = 154.0363, GNorm = 0.0855, lr_0 = 1.2485e-04
Loss = 6.7258e-04, PNorm = 154.0377, GNorm = 0.1040, lr_0 = 1.2477e-04
Loss = 1.0226e-03, PNorm = 154.0405, GNorm = 0.0618, lr_0 = 1.2468e-04
Loss = 1.0338e-03, PNorm = 154.0417, GNorm = 0.0393, lr_0 = 1.2460e-04
Loss = 8.8316e-04, PNorm = 154.0413, GNorm = 0.1609, lr_0 = 1.2451e-04
Loss = 6.0419e-04, PNorm = 154.0413, GNorm = 0.0832, lr_0 = 1.2443e-04
Loss = 1.2833e-03, PNorm = 154.0435, GNorm = 0.0596, lr_0 = 1.2434e-04
Loss = 1.5168e-03, PNorm = 154.0449, GNorm = 0.0633, lr_0 = 1.2426e-04
Loss = 2.0327e-03, PNorm = 154.0464, GNorm = 0.2073, lr_0 = 1.2417e-04
Loss = 8.6597e-04, PNorm = 154.0479, GNorm = 0.0423, lr_0 = 1.2409e-04
Loss = 7.8883e-04, PNorm = 154.0499, GNorm = 0.0788, lr_0 = 1.2400e-04
Loss = 3.2790e-03, PNorm = 154.0505, GNorm = 0.1961, lr_0 = 1.2392e-04
Loss = 1.3125e-03, PNorm = 154.0536, GNorm = 0.0528, lr_0 = 1.2383e-04
Loss = 3.6253e-03, PNorm = 154.0545, GNorm = 0.1889, lr_0 = 1.2375e-04
Loss = 9.8946e-04, PNorm = 154.0585, GNorm = 0.0968, lr_0 = 1.2366e-04
Loss = 1.0113e-03, PNorm = 154.0609, GNorm = 0.1425, lr_0 = 1.2358e-04
Loss = 1.0814e-03, PNorm = 154.0621, GNorm = 0.0815, lr_0 = 1.2349e-04
Loss = 7.1587e-04, PNorm = 154.0646, GNorm = 0.0751, lr_0 = 1.2341e-04
Loss = 2.6683e-03, PNorm = 154.0664, GNorm = 0.1881, lr_0 = 1.2332e-04
Loss = 1.5363e-03, PNorm = 154.0675, GNorm = 0.0583, lr_0 = 1.2324e-04
Loss = 7.9226e-04, PNorm = 154.0696, GNorm = 0.0831, lr_0 = 1.2315e-04
Loss = 1.2739e-03, PNorm = 154.0708, GNorm = 0.0988, lr_0 = 1.2307e-04
Loss = 8.7727e-04, PNorm = 154.0722, GNorm = 0.0516, lr_0 = 1.2298e-04
Loss = 7.9356e-04, PNorm = 154.0751, GNorm = 0.0568, lr_0 = 1.2290e-04
Loss = 1.0673e-03, PNorm = 154.0798, GNorm = 0.0220, lr_0 = 1.2282e-04
Loss = 1.7382e-03, PNorm = 154.0809, GNorm = 0.1503, lr_0 = 1.2273e-04
Loss = 3.2354e-03, PNorm = 154.0800, GNorm = 0.1068, lr_0 = 1.2265e-04
Loss = 6.5985e-04, PNorm = 154.0805, GNorm = 0.1826, lr_0 = 1.2256e-04
Loss = 1.6347e-03, PNorm = 154.0826, GNorm = 0.0527, lr_0 = 1.2248e-04
Loss = 1.1931e-03, PNorm = 154.0832, GNorm = 0.0701, lr_0 = 1.2240e-04
Loss = 1.5929e-03, PNorm = 154.0839, GNorm = 0.0470, lr_0 = 1.2231e-04
Loss = 2.1095e-03, PNorm = 154.0851, GNorm = 0.1376, lr_0 = 1.2223e-04
Loss = 1.4346e-03, PNorm = 154.0864, GNorm = 0.0739, lr_0 = 1.2214e-04
Loss = 1.2824e-03, PNorm = 154.0875, GNorm = 0.0966, lr_0 = 1.2206e-04
Loss = 1.2110e-03, PNorm = 154.0893, GNorm = 0.1642, lr_0 = 1.2198e-04
Loss = 6.0934e-04, PNorm = 154.0897, GNorm = 0.0660, lr_0 = 1.2189e-04
Loss = 1.4750e-03, PNorm = 154.0897, GNorm = 0.1255, lr_0 = 1.2181e-04
Loss = 1.4727e-03, PNorm = 154.0909, GNorm = 0.0624, lr_0 = 1.2173e-04
Loss = 2.2460e-03, PNorm = 154.0906, GNorm = 0.1348, lr_0 = 1.2164e-04
Loss = 5.9239e-04, PNorm = 154.0917, GNorm = 0.0552, lr_0 = 1.2156e-04
Loss = 1.0133e-03, PNorm = 154.0949, GNorm = 0.1064, lr_0 = 1.2148e-04
Loss = 2.4217e-03, PNorm = 154.0962, GNorm = 0.0775, lr_0 = 1.2139e-04
Loss = 6.6573e-04, PNorm = 154.0999, GNorm = 0.0637, lr_0 = 1.2131e-04
Loss = 2.0899e-03, PNorm = 154.1026, GNorm = 0.1089, lr_0 = 1.2123e-04
Loss = 5.1710e-04, PNorm = 154.1054, GNorm = 0.0321, lr_0 = 1.2114e-04
Loss = 2.1611e-03, PNorm = 154.1078, GNorm = 0.1280, lr_0 = 1.2106e-04
Loss = 6.0351e-04, PNorm = 154.1108, GNorm = 0.0503, lr_0 = 1.2098e-04
Loss = 8.4859e-04, PNorm = 154.1125, GNorm = 0.0293, lr_0 = 1.2090e-04
Loss = 5.5203e-04, PNorm = 154.1134, GNorm = 0.0500, lr_0 = 1.2081e-04
Loss = 6.6201e-04, PNorm = 154.1134, GNorm = 0.1234, lr_0 = 1.2073e-04
Loss = 1.6853e-03, PNorm = 154.1161, GNorm = 0.1453, lr_0 = 1.2065e-04
Loss = 1.5168e-03, PNorm = 154.1184, GNorm = 0.0867, lr_0 = 1.2056e-04
Loss = 1.3329e-03, PNorm = 154.1202, GNorm = 0.0644, lr_0 = 1.2048e-04
Loss = 1.0990e-03, PNorm = 154.1217, GNorm = 0.0648, lr_0 = 1.2040e-04
Loss = 6.8856e-04, PNorm = 154.1225, GNorm = 0.0496, lr_0 = 1.2032e-04
Loss = 1.6161e-03, PNorm = 154.1232, GNorm = 0.0565, lr_0 = 1.2023e-04
Loss = 1.1107e-03, PNorm = 154.1250, GNorm = 0.0716, lr_0 = 1.2015e-04
Loss = 1.8785e-03, PNorm = 154.1268, GNorm = 0.1199, lr_0 = 1.2007e-04
Loss = 1.0505e-03, PNorm = 154.1282, GNorm = 0.0749, lr_0 = 1.1999e-04
Loss = 6.1644e-04, PNorm = 154.1300, GNorm = 0.0938, lr_0 = 1.1991e-04
Loss = 2.1144e-03, PNorm = 154.1300, GNorm = 0.1681, lr_0 = 1.1982e-04
Loss = 7.5536e-04, PNorm = 154.1317, GNorm = 0.0968, lr_0 = 1.1974e-04
Loss = 6.3071e-04, PNorm = 154.1337, GNorm = 0.0891, lr_0 = 1.1966e-04
Loss = 6.7460e-04, PNorm = 154.1351, GNorm = 0.0315, lr_0 = 1.1958e-04
Loss = 9.4566e-04, PNorm = 154.1367, GNorm = 0.0335, lr_0 = 1.1950e-04
Loss = 1.4896e-03, PNorm = 154.1380, GNorm = 0.0949, lr_0 = 1.1941e-04
Loss = 3.0857e-03, PNorm = 154.1400, GNorm = 0.1182, lr_0 = 1.1933e-04
Loss = 2.6034e-03, PNorm = 154.1420, GNorm = 0.0803, lr_0 = 1.1925e-04
Loss = 2.4710e-03, PNorm = 154.1455, GNorm = 0.1800, lr_0 = 1.1917e-04
Loss = 8.8969e-04, PNorm = 154.1464, GNorm = 0.0424, lr_0 = 1.1909e-04
Loss = 7.3089e-04, PNorm = 154.1464, GNorm = 0.0778, lr_0 = 1.1901e-04
Loss = 3.0522e-03, PNorm = 154.1460, GNorm = 0.1563, lr_0 = 1.1892e-04
Loss = 7.3918e-04, PNorm = 154.1480, GNorm = 0.1106, lr_0 = 1.1884e-04
Loss = 1.4934e-03, PNorm = 154.1499, GNorm = 0.0912, lr_0 = 1.1876e-04
Loss = 1.6043e-03, PNorm = 154.1509, GNorm = 0.0559, lr_0 = 1.1868e-04
Loss = 1.4297e-03, PNorm = 154.1528, GNorm = 0.1183, lr_0 = 1.1860e-04
Loss = 1.2067e-03, PNorm = 154.1553, GNorm = 0.0318, lr_0 = 1.1852e-04
Loss = 1.2510e-03, PNorm = 154.1576, GNorm = 0.0598, lr_0 = 1.1844e-04
Loss = 9.8193e-04, PNorm = 154.1594, GNorm = 0.0558, lr_0 = 1.1835e-04
Loss = 1.1383e-03, PNorm = 154.1592, GNorm = 0.0817, lr_0 = 1.1827e-04
Loss = 7.0713e-04, PNorm = 154.1617, GNorm = 0.0534, lr_0 = 1.1819e-04
Loss = 1.6400e-03, PNorm = 154.1628, GNorm = 0.0766, lr_0 = 1.1811e-04
Loss = 1.7220e-03, PNorm = 154.1648, GNorm = 0.0444, lr_0 = 1.1803e-04
Loss = 1.5086e-03, PNorm = 154.1659, GNorm = 0.0564, lr_0 = 1.1795e-04
Loss = 5.5683e-04, PNorm = 154.1674, GNorm = 0.1450, lr_0 = 1.1787e-04
Validation mae = 0.476420
Epoch 28
Loss = 4.7087e-04, PNorm = 154.1690, GNorm = 0.0447, lr_0 = 1.1779e-04
Loss = 5.9760e-04, PNorm = 154.1700, GNorm = 0.0517, lr_0 = 1.1771e-04
Loss = 8.4601e-04, PNorm = 154.1721, GNorm = 0.1000, lr_0 = 1.1763e-04
Loss = 1.0270e-03, PNorm = 154.1745, GNorm = 0.0803, lr_0 = 1.1755e-04
Loss = 6.6675e-04, PNorm = 154.1742, GNorm = 0.0954, lr_0 = 1.1747e-04
Loss = 1.5544e-03, PNorm = 154.1738, GNorm = 0.0876, lr_0 = 1.1739e-04
Loss = 1.3031e-03, PNorm = 154.1756, GNorm = 0.1576, lr_0 = 1.1730e-04
Loss = 7.1720e-04, PNorm = 154.1757, GNorm = 0.2477, lr_0 = 1.1722e-04
Loss = 6.0082e-04, PNorm = 154.1767, GNorm = 0.0390, lr_0 = 1.1714e-04
Loss = 1.1311e-03, PNorm = 154.1769, GNorm = 0.1886, lr_0 = 1.1706e-04
Loss = 5.5451e-04, PNorm = 154.1764, GNorm = 0.0455, lr_0 = 1.1698e-04
Loss = 9.7272e-04, PNorm = 154.1779, GNorm = 0.0731, lr_0 = 1.1690e-04
Loss = 2.1928e-03, PNorm = 154.1790, GNorm = 0.0326, lr_0 = 1.1682e-04
Loss = 8.6984e-04, PNorm = 154.1801, GNorm = 0.0618, lr_0 = 1.1674e-04
Loss = 8.8028e-04, PNorm = 154.1811, GNorm = 0.0569, lr_0 = 1.1666e-04
Loss = 1.5222e-03, PNorm = 154.1819, GNorm = 0.1508, lr_0 = 1.1658e-04
Loss = 2.7555e-03, PNorm = 154.1825, GNorm = 0.3884, lr_0 = 1.1650e-04
Loss = 1.5805e-03, PNorm = 154.1820, GNorm = 0.1860, lr_0 = 1.1642e-04
Loss = 1.4613e-03, PNorm = 154.1841, GNorm = 0.0812, lr_0 = 1.1634e-04
Loss = 6.5462e-04, PNorm = 154.1851, GNorm = 0.0497, lr_0 = 1.1626e-04
Loss = 1.7311e-03, PNorm = 154.1872, GNorm = 0.1135, lr_0 = 1.1618e-04
Loss = 8.8872e-04, PNorm = 154.1878, GNorm = 0.3280, lr_0 = 1.1611e-04
Loss = 1.7876e-03, PNorm = 154.1881, GNorm = 0.3060, lr_0 = 1.1603e-04
Loss = 1.0216e-03, PNorm = 154.1894, GNorm = 0.1215, lr_0 = 1.1595e-04
Loss = 1.0156e-03, PNorm = 154.1897, GNorm = 0.1506, lr_0 = 1.1587e-04
Loss = 8.3048e-04, PNorm = 154.1906, GNorm = 0.0795, lr_0 = 1.1579e-04
Loss = 7.8804e-04, PNorm = 154.1919, GNorm = 0.1082, lr_0 = 1.1571e-04
Loss = 1.0118e-03, PNorm = 154.1939, GNorm = 0.0729, lr_0 = 1.1563e-04
Loss = 5.0490e-04, PNorm = 154.1950, GNorm = 0.0250, lr_0 = 1.1555e-04
Loss = 2.8822e-03, PNorm = 154.1967, GNorm = 0.0272, lr_0 = 1.1547e-04
Loss = 1.6639e-03, PNorm = 154.1968, GNorm = 0.0570, lr_0 = 1.1539e-04
Loss = 1.0078e-03, PNorm = 154.1981, GNorm = 0.0372, lr_0 = 1.1531e-04
Loss = 5.4874e-04, PNorm = 154.1988, GNorm = 0.1003, lr_0 = 1.1523e-04
Loss = 5.1020e-04, PNorm = 154.1980, GNorm = 0.0358, lr_0 = 1.1515e-04
Loss = 7.9652e-04, PNorm = 154.1984, GNorm = 0.0183, lr_0 = 1.1508e-04
Loss = 7.9372e-04, PNorm = 154.2007, GNorm = 0.0588, lr_0 = 1.1500e-04
Loss = 8.5702e-04, PNorm = 154.2028, GNorm = 0.1163, lr_0 = 1.1492e-04
Loss = 6.2333e-04, PNorm = 154.2052, GNorm = 0.1722, lr_0 = 1.1484e-04
Loss = 1.5232e-03, PNorm = 154.2060, GNorm = 0.0412, lr_0 = 1.1476e-04
Loss = 1.1003e-03, PNorm = 154.2081, GNorm = 0.0775, lr_0 = 1.1468e-04
Loss = 9.5356e-04, PNorm = 154.2083, GNorm = 0.0378, lr_0 = 1.1460e-04
Loss = 2.1005e-03, PNorm = 154.2076, GNorm = 0.0686, lr_0 = 1.1452e-04
Loss = 2.7778e-03, PNorm = 154.2084, GNorm = 0.1109, lr_0 = 1.1445e-04
Loss = 6.3664e-04, PNorm = 154.2109, GNorm = 0.0763, lr_0 = 1.1437e-04
Loss = 4.5707e-04, PNorm = 154.2129, GNorm = 0.0514, lr_0 = 1.1429e-04
Loss = 8.7595e-04, PNorm = 154.2148, GNorm = 0.0224, lr_0 = 1.1421e-04
Loss = 6.6437e-04, PNorm = 154.2165, GNorm = 0.0376, lr_0 = 1.1413e-04
Loss = 9.6236e-04, PNorm = 154.2180, GNorm = 0.0253, lr_0 = 1.1405e-04
Loss = 5.3135e-04, PNorm = 154.2200, GNorm = 0.0659, lr_0 = 1.1398e-04
Loss = 1.1053e-03, PNorm = 154.2220, GNorm = 0.0882, lr_0 = 1.1390e-04
Loss = 1.6558e-03, PNorm = 154.2227, GNorm = 0.1551, lr_0 = 1.1382e-04
Loss = 9.6464e-04, PNorm = 154.2243, GNorm = 0.1390, lr_0 = 1.1374e-04
Loss = 8.0100e-04, PNorm = 154.2244, GNorm = 0.0274, lr_0 = 1.1366e-04
Loss = 1.2936e-03, PNorm = 154.2267, GNorm = 0.0658, lr_0 = 1.1359e-04
Loss = 1.6998e-03, PNorm = 154.2283, GNorm = 0.0720, lr_0 = 1.1351e-04
Loss = 1.4290e-03, PNorm = 154.2293, GNorm = 0.0265, lr_0 = 1.1343e-04
Loss = 2.8354e-03, PNorm = 154.2302, GNorm = 0.0639, lr_0 = 1.1335e-04
Loss = 3.5180e-03, PNorm = 154.2303, GNorm = 0.2070, lr_0 = 1.1328e-04
Loss = 6.2605e-04, PNorm = 154.2325, GNorm = 0.1505, lr_0 = 1.1320e-04
Loss = 5.6340e-04, PNorm = 154.2350, GNorm = 0.0467, lr_0 = 1.1312e-04
Loss = 4.8992e-04, PNorm = 154.2369, GNorm = 0.0624, lr_0 = 1.1304e-04
Loss = 7.2251e-04, PNorm = 154.2379, GNorm = 0.0432, lr_0 = 1.1297e-04
Loss = 1.4931e-03, PNorm = 154.2407, GNorm = 0.0938, lr_0 = 1.1289e-04
Loss = 2.2667e-03, PNorm = 154.2415, GNorm = 0.3527, lr_0 = 1.1281e-04
Loss = 7.0476e-04, PNorm = 154.2428, GNorm = 0.0343, lr_0 = 1.1273e-04
Loss = 5.5688e-04, PNorm = 154.2432, GNorm = 0.0483, lr_0 = 1.1266e-04
Loss = 5.2968e-04, PNorm = 154.2450, GNorm = 0.0507, lr_0 = 1.1258e-04
Loss = 5.8119e-04, PNorm = 154.2464, GNorm = 0.0214, lr_0 = 1.1250e-04
Loss = 9.1482e-04, PNorm = 154.2473, GNorm = 0.0660, lr_0 = 1.1243e-04
Loss = 1.6011e-03, PNorm = 154.2490, GNorm = 0.1708, lr_0 = 1.1235e-04
Loss = 9.8083e-04, PNorm = 154.2503, GNorm = 0.1051, lr_0 = 1.1227e-04
Loss = 1.0829e-03, PNorm = 154.2528, GNorm = 0.1461, lr_0 = 1.1219e-04
Loss = 5.1180e-04, PNorm = 154.2537, GNorm = 0.1743, lr_0 = 1.1212e-04
Loss = 1.2664e-03, PNorm = 154.2543, GNorm = 0.0527, lr_0 = 1.1204e-04
Loss = 4.8974e-04, PNorm = 154.2549, GNorm = 0.0972, lr_0 = 1.1196e-04
Loss = 7.7772e-04, PNorm = 154.2560, GNorm = 0.2578, lr_0 = 1.1189e-04
Loss = 1.5729e-03, PNorm = 154.2576, GNorm = 0.0601, lr_0 = 1.1181e-04
Loss = 1.4334e-03, PNorm = 154.2594, GNorm = 0.0833, lr_0 = 1.1173e-04
Loss = 2.7812e-03, PNorm = 154.2611, GNorm = 0.0971, lr_0 = 1.1166e-04
Loss = 1.0685e-03, PNorm = 154.2633, GNorm = 0.0387, lr_0 = 1.1158e-04
Loss = 1.0909e-03, PNorm = 154.2640, GNorm = 0.0884, lr_0 = 1.1150e-04
Loss = 1.2911e-03, PNorm = 154.2659, GNorm = 0.1541, lr_0 = 1.1143e-04
Loss = 2.0867e-03, PNorm = 154.2689, GNorm = 0.0302, lr_0 = 1.1135e-04
Loss = 9.2346e-04, PNorm = 154.2702, GNorm = 0.0510, lr_0 = 1.1128e-04
Loss = 4.6039e-04, PNorm = 154.2726, GNorm = 0.1038, lr_0 = 1.1120e-04
Loss = 9.6874e-04, PNorm = 154.2734, GNorm = 0.0476, lr_0 = 1.1112e-04
Loss = 1.1610e-03, PNorm = 154.2743, GNorm = 0.0591, lr_0 = 1.1105e-04
Loss = 2.2505e-03, PNorm = 154.2756, GNorm = 0.1174, lr_0 = 1.1097e-04
Loss = 5.9788e-04, PNorm = 154.2764, GNorm = 0.0393, lr_0 = 1.1089e-04
Loss = 4.5927e-04, PNorm = 154.2773, GNorm = 0.0651, lr_0 = 1.1082e-04
Loss = 1.1288e-03, PNorm = 154.2788, GNorm = 0.0700, lr_0 = 1.1074e-04
Loss = 5.1251e-04, PNorm = 154.2791, GNorm = 0.0288, lr_0 = 1.1067e-04
Loss = 1.3050e-03, PNorm = 154.2808, GNorm = 0.0323, lr_0 = 1.1059e-04
Loss = 1.8796e-03, PNorm = 154.2824, GNorm = 0.1477, lr_0 = 1.1052e-04
Loss = 7.4395e-04, PNorm = 154.2831, GNorm = 0.0574, lr_0 = 1.1044e-04
Loss = 5.8865e-04, PNorm = 154.2842, GNorm = 0.0406, lr_0 = 1.1036e-04
Loss = 1.1755e-03, PNorm = 154.2838, GNorm = 0.1589, lr_0 = 1.1029e-04
Loss = 1.1621e-03, PNorm = 154.2857, GNorm = 0.0985, lr_0 = 1.1021e-04
Loss = 1.8420e-03, PNorm = 154.2877, GNorm = 0.1093, lr_0 = 1.1014e-04
Loss = 3.7600e-03, PNorm = 154.2890, GNorm = 0.0869, lr_0 = 1.1006e-04
Loss = 1.5328e-03, PNorm = 154.2920, GNorm = 0.0558, lr_0 = 1.0999e-04
Loss = 6.0195e-04, PNorm = 154.2941, GNorm = 0.0442, lr_0 = 1.0991e-04
Loss = 5.3486e-04, PNorm = 154.2962, GNorm = 0.0379, lr_0 = 1.0984e-04
Loss = 1.3300e-03, PNorm = 154.2973, GNorm = 0.0642, lr_0 = 1.0976e-04
Loss = 1.3300e-03, PNorm = 154.2965, GNorm = 0.2767, lr_0 = 1.0969e-04
Loss = 1.9762e-03, PNorm = 154.2966, GNorm = 0.4045, lr_0 = 1.0961e-04
Loss = 7.0697e-04, PNorm = 154.2982, GNorm = 0.0846, lr_0 = 1.0954e-04
Loss = 1.0235e-03, PNorm = 154.2998, GNorm = 0.1920, lr_0 = 1.0946e-04
Loss = 8.4806e-04, PNorm = 154.3011, GNorm = 0.0368, lr_0 = 1.0939e-04
Loss = 8.0057e-04, PNorm = 154.3026, GNorm = 0.0594, lr_0 = 1.0931e-04
Loss = 1.1638e-03, PNorm = 154.3051, GNorm = 0.1088, lr_0 = 1.0924e-04
Loss = 9.6894e-04, PNorm = 154.3053, GNorm = 0.0511, lr_0 = 1.0916e-04
Loss = 1.1669e-03, PNorm = 154.3069, GNorm = 0.0997, lr_0 = 1.0909e-04
Loss = 4.7506e-04, PNorm = 154.3081, GNorm = 0.0636, lr_0 = 1.0901e-04
Loss = 1.0906e-03, PNorm = 154.3098, GNorm = 0.1020, lr_0 = 1.0894e-04
Loss = 1.3837e-03, PNorm = 154.3111, GNorm = 0.2707, lr_0 = 1.0886e-04
Loss = 1.0821e-03, PNorm = 154.3127, GNorm = 0.1148, lr_0 = 1.0879e-04
Loss = 2.2032e-03, PNorm = 154.3153, GNorm = 0.1546, lr_0 = 1.0871e-04
Loss = 3.9138e-03, PNorm = 154.3170, GNorm = 0.2225, lr_0 = 1.0864e-04
Loss = 6.0192e-04, PNorm = 154.3203, GNorm = 0.0612, lr_0 = 1.0856e-04
Validation mae = 0.475394
Epoch 29
Loss = 7.4893e-04, PNorm = 154.3221, GNorm = 0.0665, lr_0 = 1.0849e-04
Loss = 1.0560e-03, PNorm = 154.3230, GNorm = 0.0614, lr_0 = 1.0841e-04
Loss = 3.8671e-04, PNorm = 154.3233, GNorm = 0.0837, lr_0 = 1.0834e-04
Loss = 6.7484e-04, PNorm = 154.3221, GNorm = 0.0728, lr_0 = 1.0827e-04
Loss = 1.1338e-03, PNorm = 154.3223, GNorm = 0.0298, lr_0 = 1.0819e-04
Loss = 5.3169e-04, PNorm = 154.3229, GNorm = 0.0646, lr_0 = 1.0812e-04
Loss = 1.6546e-03, PNorm = 154.3228, GNorm = 0.3072, lr_0 = 1.0804e-04
Loss = 1.1612e-03, PNorm = 154.3246, GNorm = 0.2023, lr_0 = 1.0797e-04
Loss = 1.2110e-03, PNorm = 154.3256, GNorm = 0.0769, lr_0 = 1.0790e-04
Loss = 1.1672e-03, PNorm = 154.3266, GNorm = 0.0927, lr_0 = 1.0782e-04
Loss = 1.0210e-03, PNorm = 154.3274, GNorm = 0.0705, lr_0 = 1.0775e-04
Loss = 1.2808e-03, PNorm = 154.3278, GNorm = 0.0329, lr_0 = 1.0767e-04
Loss = 1.3150e-03, PNorm = 154.3284, GNorm = 0.1618, lr_0 = 1.0760e-04
Loss = 7.3546e-04, PNorm = 154.3268, GNorm = 0.1027, lr_0 = 1.0753e-04
Loss = 5.1807e-04, PNorm = 154.3272, GNorm = 0.0602, lr_0 = 1.0745e-04
Loss = 1.2371e-03, PNorm = 154.3284, GNorm = 0.0721, lr_0 = 1.0738e-04
Loss = 1.6723e-03, PNorm = 154.3300, GNorm = 0.0682, lr_0 = 1.0731e-04
Loss = 3.8467e-03, PNorm = 154.3320, GNorm = 0.0923, lr_0 = 1.0723e-04
Loss = 9.3218e-04, PNorm = 154.3337, GNorm = 0.0629, lr_0 = 1.0716e-04
Loss = 1.3694e-03, PNorm = 154.3350, GNorm = 0.0560, lr_0 = 1.0709e-04
Loss = 9.1396e-04, PNorm = 154.3355, GNorm = 0.0785, lr_0 = 1.0701e-04
Loss = 8.8813e-04, PNorm = 154.3385, GNorm = 0.1075, lr_0 = 1.0694e-04
Loss = 8.1169e-04, PNorm = 154.3393, GNorm = 0.0734, lr_0 = 1.0687e-04
Loss = 1.4237e-03, PNorm = 154.3403, GNorm = 0.0780, lr_0 = 1.0679e-04
Loss = 5.1315e-04, PNorm = 154.3414, GNorm = 0.0799, lr_0 = 1.0672e-04
Loss = 1.0803e-03, PNorm = 154.3422, GNorm = 0.0726, lr_0 = 1.0665e-04
Loss = 5.4045e-04, PNorm = 154.3439, GNorm = 0.0811, lr_0 = 1.0657e-04
Loss = 5.4831e-04, PNorm = 154.3447, GNorm = 0.0415, lr_0 = 1.0650e-04
Loss = 4.4902e-04, PNorm = 154.3458, GNorm = 0.0187, lr_0 = 1.0643e-04
Loss = 4.9455e-04, PNorm = 154.3472, GNorm = 0.0465, lr_0 = 1.0635e-04
Loss = 1.0367e-03, PNorm = 154.3486, GNorm = 0.3431, lr_0 = 1.0628e-04
Loss = 1.4969e-03, PNorm = 154.3493, GNorm = 0.0220, lr_0 = 1.0621e-04
Loss = 7.0968e-04, PNorm = 154.3501, GNorm = 0.0426, lr_0 = 1.0614e-04
Loss = 6.9154e-04, PNorm = 154.3514, GNorm = 0.0273, lr_0 = 1.0606e-04
Loss = 4.5992e-04, PNorm = 154.3510, GNorm = 0.0689, lr_0 = 1.0599e-04
Loss = 1.6543e-03, PNorm = 154.3514, GNorm = 0.0958, lr_0 = 1.0592e-04
Loss = 6.3059e-04, PNorm = 154.3520, GNorm = 0.0715, lr_0 = 1.0585e-04
Loss = 6.1348e-04, PNorm = 154.3533, GNorm = 0.1330, lr_0 = 1.0577e-04
Loss = 9.5238e-04, PNorm = 154.3544, GNorm = 0.0759, lr_0 = 1.0570e-04
Loss = 3.1351e-03, PNorm = 154.3557, GNorm = 0.1609, lr_0 = 1.0563e-04
Loss = 1.8569e-03, PNorm = 154.3577, GNorm = 0.1321, lr_0 = 1.0556e-04
Loss = 1.1815e-03, PNorm = 154.3599, GNorm = 0.0958, lr_0 = 1.0548e-04
Loss = 1.7786e-03, PNorm = 154.3611, GNorm = 0.0378, lr_0 = 1.0541e-04
Loss = 2.0636e-03, PNorm = 154.3613, GNorm = 0.1211, lr_0 = 1.0534e-04
Loss = 5.4057e-04, PNorm = 154.3614, GNorm = 0.0792, lr_0 = 1.0527e-04
Loss = 2.0223e-03, PNorm = 154.3620, GNorm = 0.1214, lr_0 = 1.0519e-04
Loss = 5.4361e-04, PNorm = 154.3624, GNorm = 0.0922, lr_0 = 1.0512e-04
Loss = 6.8984e-04, PNorm = 154.3640, GNorm = 0.1449, lr_0 = 1.0505e-04
Loss = 1.9003e-03, PNorm = 154.3664, GNorm = 0.1401, lr_0 = 1.0498e-04
Loss = 7.8949e-04, PNorm = 154.3685, GNorm = 0.1441, lr_0 = 1.0491e-04
Loss = 2.3267e-03, PNorm = 154.3703, GNorm = 0.0885, lr_0 = 1.0483e-04
Loss = 7.8957e-04, PNorm = 154.3730, GNorm = 0.0993, lr_0 = 1.0476e-04
Loss = 3.8056e-04, PNorm = 154.3744, GNorm = 0.1063, lr_0 = 1.0469e-04
Loss = 1.4832e-03, PNorm = 154.3759, GNorm = 0.0873, lr_0 = 1.0462e-04
Loss = 9.2354e-04, PNorm = 154.3773, GNorm = 0.0358, lr_0 = 1.0455e-04
Loss = 1.1172e-03, PNorm = 154.3769, GNorm = 0.0485, lr_0 = 1.0448e-04
Loss = 8.4653e-04, PNorm = 154.3778, GNorm = 0.0834, lr_0 = 1.0440e-04
Loss = 6.0928e-04, PNorm = 154.3790, GNorm = 0.0520, lr_0 = 1.0433e-04
Loss = 5.3506e-04, PNorm = 154.3797, GNorm = 0.1009, lr_0 = 1.0426e-04
Loss = 7.5286e-04, PNorm = 154.3809, GNorm = 0.0359, lr_0 = 1.0419e-04
Loss = 4.5260e-04, PNorm = 154.3826, GNorm = 0.0540, lr_0 = 1.0412e-04
Loss = 4.3986e-04, PNorm = 154.3839, GNorm = 0.0534, lr_0 = 1.0405e-04
Loss = 4.9041e-04, PNorm = 154.3848, GNorm = 0.0401, lr_0 = 1.0398e-04
Loss = 5.2407e-04, PNorm = 154.3842, GNorm = 0.0319, lr_0 = 1.0391e-04
Loss = 1.6683e-03, PNorm = 154.3828, GNorm = 0.0954, lr_0 = 1.0383e-04
Loss = 6.1366e-04, PNorm = 154.3849, GNorm = 0.0545, lr_0 = 1.0376e-04
Loss = 6.3631e-04, PNorm = 154.3864, GNorm = 0.0790, lr_0 = 1.0369e-04
Loss = 3.4314e-03, PNorm = 154.3872, GNorm = 0.1075, lr_0 = 1.0362e-04
Loss = 1.3446e-03, PNorm = 154.3883, GNorm = 0.0955, lr_0 = 1.0355e-04
Loss = 9.0529e-04, PNorm = 154.3889, GNorm = 0.0578, lr_0 = 1.0348e-04
Loss = 4.0992e-04, PNorm = 154.3890, GNorm = 0.0181, lr_0 = 1.0341e-04
Loss = 8.0167e-04, PNorm = 154.3897, GNorm = 0.0661, lr_0 = 1.0334e-04
Loss = 1.7001e-03, PNorm = 154.3911, GNorm = 0.2260, lr_0 = 1.0327e-04
Loss = 1.0517e-03, PNorm = 154.3914, GNorm = 0.0464, lr_0 = 1.0320e-04
Loss = 4.1608e-04, PNorm = 154.3925, GNorm = 0.0911, lr_0 = 1.0312e-04
Loss = 3.9218e-03, PNorm = 154.3946, GNorm = 0.0738, lr_0 = 1.0305e-04
Loss = 4.2331e-04, PNorm = 154.3969, GNorm = 0.0868, lr_0 = 1.0298e-04
Loss = 1.4871e-03, PNorm = 154.3988, GNorm = 0.1502, lr_0 = 1.0291e-04
Loss = 1.5306e-03, PNorm = 154.4007, GNorm = 0.0844, lr_0 = 1.0284e-04
Loss = 3.8427e-04, PNorm = 154.4012, GNorm = 0.0221, lr_0 = 1.0277e-04
Loss = 2.5139e-03, PNorm = 154.4010, GNorm = 0.0799, lr_0 = 1.0270e-04
Loss = 2.5694e-03, PNorm = 154.4006, GNorm = 0.1508, lr_0 = 1.0263e-04
Loss = 2.0487e-03, PNorm = 154.4002, GNorm = 0.0330, lr_0 = 1.0256e-04
Loss = 4.4719e-04, PNorm = 154.4011, GNorm = 0.0425, lr_0 = 1.0249e-04
Loss = 2.2202e-03, PNorm = 154.4024, GNorm = 0.1636, lr_0 = 1.0242e-04
Loss = 1.5166e-03, PNorm = 154.4033, GNorm = 0.0802, lr_0 = 1.0235e-04
Loss = 4.5808e-04, PNorm = 154.4056, GNorm = 0.0385, lr_0 = 1.0228e-04
Loss = 1.5065e-03, PNorm = 154.4075, GNorm = 0.2032, lr_0 = 1.0221e-04
Loss = 9.1666e-04, PNorm = 154.4088, GNorm = 0.0782, lr_0 = 1.0214e-04
Loss = 8.3541e-04, PNorm = 154.4106, GNorm = 0.0626, lr_0 = 1.0207e-04
Loss = 4.5571e-04, PNorm = 154.4120, GNorm = 0.0526, lr_0 = 1.0200e-04
Loss = 5.4327e-04, PNorm = 154.4129, GNorm = 0.0701, lr_0 = 1.0193e-04
Loss = 1.4722e-03, PNorm = 154.4151, GNorm = 0.2134, lr_0 = 1.0186e-04
Loss = 1.3801e-03, PNorm = 154.4152, GNorm = 0.1988, lr_0 = 1.0179e-04
Loss = 1.3773e-03, PNorm = 154.4150, GNorm = 0.1315, lr_0 = 1.0172e-04
Loss = 1.4218e-03, PNorm = 154.4163, GNorm = 0.1603, lr_0 = 1.0165e-04
Loss = 8.3113e-04, PNorm = 154.4175, GNorm = 0.0553, lr_0 = 1.0158e-04
Loss = 5.7695e-04, PNorm = 154.4184, GNorm = 0.0791, lr_0 = 1.0151e-04
Loss = 1.7752e-03, PNorm = 154.4191, GNorm = 0.0428, lr_0 = 1.0144e-04
Loss = 9.8069e-04, PNorm = 154.4196, GNorm = 0.0488, lr_0 = 1.0137e-04
Loss = 1.1683e-03, PNorm = 154.4216, GNorm = 0.0831, lr_0 = 1.0130e-04
Loss = 5.9762e-04, PNorm = 154.4238, GNorm = 0.1027, lr_0 = 1.0123e-04
Loss = 1.8220e-03, PNorm = 154.4257, GNorm = 0.0792, lr_0 = 1.0116e-04
Loss = 4.4247e-04, PNorm = 154.4272, GNorm = 0.0242, lr_0 = 1.0110e-04
Loss = 6.7283e-04, PNorm = 154.4278, GNorm = 0.1315, lr_0 = 1.0103e-04
Loss = 1.0380e-03, PNorm = 154.4291, GNorm = 0.0331, lr_0 = 1.0096e-04
Loss = 1.3066e-03, PNorm = 154.4297, GNorm = 0.1310, lr_0 = 1.0089e-04
Loss = 4.9387e-04, PNorm = 154.4307, GNorm = 0.0423, lr_0 = 1.0082e-04
Loss = 1.6666e-03, PNorm = 154.4315, GNorm = 0.0374, lr_0 = 1.0075e-04
Loss = 5.7633e-04, PNorm = 154.4330, GNorm = 0.0789, lr_0 = 1.0068e-04
Loss = 6.4154e-04, PNorm = 154.4343, GNorm = 0.1200, lr_0 = 1.0061e-04
Loss = 9.0329e-04, PNorm = 154.4364, GNorm = 0.0415, lr_0 = 1.0054e-04
Loss = 5.4586e-04, PNorm = 154.4384, GNorm = 0.0495, lr_0 = 1.0047e-04
Loss = 9.8997e-04, PNorm = 154.4409, GNorm = 0.1030, lr_0 = 1.0041e-04
Loss = 1.1664e-03, PNorm = 154.4428, GNorm = 0.0383, lr_0 = 1.0034e-04
Loss = 1.1261e-03, PNorm = 154.4457, GNorm = 0.1261, lr_0 = 1.0027e-04
Loss = 3.7358e-04, PNorm = 154.4456, GNorm = 0.0198, lr_0 = 1.0020e-04
Loss = 4.0816e-04, PNorm = 154.4450, GNorm = 0.0390, lr_0 = 1.0013e-04
Loss = 1.2793e-03, PNorm = 154.4452, GNorm = 0.1880, lr_0 = 1.0006e-04
Loss = 5.3121e-04, PNorm = 154.4467, GNorm = 0.0768, lr_0 = 1.0000e-04
Validation mae = 0.475857
Model 0 best validation mae = 0.475394 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.454259
Ensemble test mae = 0.454259
Fold 8
Splitting data with seed 8
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 8.4109e-01, PNorm = 62.2446, GNorm = 2.5222, lr_0 = 1.0413e-04
Loss = 7.5742e-01, PNorm = 62.2575, GNorm = 1.7938, lr_0 = 1.0788e-04
Loss = 5.8110e-01, PNorm = 62.2714, GNorm = 3.1851, lr_0 = 1.1163e-04
Loss = 4.8942e-01, PNorm = 62.2829, GNorm = 3.1420, lr_0 = 1.1537e-04
Loss = 4.4059e-01, PNorm = 62.2927, GNorm = 2.6403, lr_0 = 1.1913e-04
Loss = 4.6554e-01, PNorm = 62.3016, GNorm = 2.0092, lr_0 = 1.2287e-04
Loss = 4.7096e-01, PNorm = 62.3096, GNorm = 1.7274, lr_0 = 1.2663e-04
Loss = 3.9222e-01, PNorm = 62.3206, GNorm = 2.5335, lr_0 = 1.3038e-04
Loss = 4.1186e-01, PNorm = 62.3293, GNorm = 2.9675, lr_0 = 1.3413e-04
Loss = 3.4110e-01, PNorm = 62.3375, GNorm = 1.7202, lr_0 = 1.3788e-04
Loss = 3.4596e-01, PNorm = 62.3468, GNorm = 1.6762, lr_0 = 1.4163e-04
Loss = 3.7172e-01, PNorm = 62.3553, GNorm = 3.1330, lr_0 = 1.4537e-04
Loss = 3.1159e-01, PNorm = 62.3637, GNorm = 1.6380, lr_0 = 1.4913e-04
Loss = 3.5171e-01, PNorm = 62.3738, GNorm = 1.8949, lr_0 = 1.5288e-04
Loss = 3.3685e-01, PNorm = 62.3838, GNorm = 2.9820, lr_0 = 1.5662e-04
Loss = 3.4519e-01, PNorm = 62.3925, GNorm = 3.9387, lr_0 = 1.6038e-04
Loss = 3.3718e-01, PNorm = 62.3996, GNorm = 2.0026, lr_0 = 1.6412e-04
Loss = 3.4756e-01, PNorm = 62.4102, GNorm = 1.6761, lr_0 = 1.6788e-04
Loss = 2.9974e-01, PNorm = 62.4224, GNorm = 2.0961, lr_0 = 1.7163e-04
Loss = 2.9987e-01, PNorm = 62.4344, GNorm = 1.9928, lr_0 = 1.7538e-04
Loss = 2.8785e-01, PNorm = 62.4449, GNorm = 1.6527, lr_0 = 1.7913e-04
Loss = 3.0041e-01, PNorm = 62.4578, GNorm = 2.0387, lr_0 = 1.8288e-04
Loss = 3.0612e-01, PNorm = 62.4703, GNorm = 1.4923, lr_0 = 1.8662e-04
Loss = 2.9131e-01, PNorm = 62.4814, GNorm = 1.4799, lr_0 = 1.9038e-04
Loss = 2.9955e-01, PNorm = 62.4973, GNorm = 3.6562, lr_0 = 1.9413e-04
Loss = 3.1054e-01, PNorm = 62.5120, GNorm = 1.9882, lr_0 = 1.9788e-04
Loss = 3.0687e-01, PNorm = 62.5244, GNorm = 1.3602, lr_0 = 2.0163e-04
Loss = 2.9870e-01, PNorm = 62.5384, GNorm = 1.2945, lr_0 = 2.0537e-04
Loss = 2.9116e-01, PNorm = 62.5514, GNorm = 1.6446, lr_0 = 2.0913e-04
Loss = 3.0925e-01, PNorm = 62.5659, GNorm = 2.1637, lr_0 = 2.1288e-04
Loss = 2.8673e-01, PNorm = 62.5816, GNorm = 1.8249, lr_0 = 2.1663e-04
Loss = 3.1944e-01, PNorm = 62.5990, GNorm = 1.5946, lr_0 = 2.2038e-04
Loss = 2.6881e-01, PNorm = 62.6179, GNorm = 1.5095, lr_0 = 2.2412e-04
Loss = 3.2074e-01, PNorm = 62.6342, GNorm = 1.5066, lr_0 = 2.2787e-04
Loss = 2.9152e-01, PNorm = 62.6529, GNorm = 1.4675, lr_0 = 2.3163e-04
Loss = 3.0577e-01, PNorm = 62.6692, GNorm = 1.8000, lr_0 = 2.3538e-04
Loss = 2.7099e-01, PNorm = 62.6885, GNorm = 2.0225, lr_0 = 2.3913e-04
Loss = 2.6692e-01, PNorm = 62.7064, GNorm = 1.1443, lr_0 = 2.4288e-04
Loss = 2.5586e-01, PNorm = 62.7236, GNorm = 2.2539, lr_0 = 2.4662e-04
Loss = 2.9567e-01, PNorm = 62.7438, GNorm = 1.3728, lr_0 = 2.5038e-04
Loss = 2.7442e-01, PNorm = 62.7655, GNorm = 1.1807, lr_0 = 2.5413e-04
Loss = 2.9000e-01, PNorm = 62.7843, GNorm = 1.3163, lr_0 = 2.5788e-04
Loss = 2.9029e-01, PNorm = 62.8083, GNorm = 1.1312, lr_0 = 2.6163e-04
Loss = 2.6297e-01, PNorm = 62.8255, GNorm = 1.5764, lr_0 = 2.6537e-04
Loss = 2.2499e-01, PNorm = 62.8430, GNorm = 0.9477, lr_0 = 2.6912e-04
Loss = 2.4698e-01, PNorm = 62.8607, GNorm = 1.1598, lr_0 = 2.7288e-04
Loss = 2.6018e-01, PNorm = 62.8784, GNorm = 1.3312, lr_0 = 2.7663e-04
Loss = 2.6581e-01, PNorm = 62.8971, GNorm = 1.3294, lr_0 = 2.8038e-04
Loss = 2.4232e-01, PNorm = 62.9171, GNorm = 1.1866, lr_0 = 2.8413e-04
Loss = 2.6036e-01, PNorm = 62.9383, GNorm = 1.3464, lr_0 = 2.8787e-04
Loss = 2.3855e-01, PNorm = 62.9592, GNorm = 1.2697, lr_0 = 2.9163e-04
Loss = 2.4386e-01, PNorm = 62.9779, GNorm = 1.4607, lr_0 = 2.9538e-04
Loss = 2.5574e-01, PNorm = 62.9995, GNorm = 1.0357, lr_0 = 2.9913e-04
Loss = 2.3839e-01, PNorm = 63.0249, GNorm = 1.4070, lr_0 = 3.0288e-04
Loss = 2.5319e-01, PNorm = 63.0484, GNorm = 1.7912, lr_0 = 3.0662e-04
Loss = 2.0411e-01, PNorm = 63.0725, GNorm = 1.1552, lr_0 = 3.1037e-04
Loss = 2.4843e-01, PNorm = 63.0922, GNorm = 1.5043, lr_0 = 3.1413e-04
Loss = 2.3210e-01, PNorm = 63.1158, GNorm = 1.2681, lr_0 = 3.1788e-04
Loss = 2.3589e-01, PNorm = 63.1379, GNorm = 1.5510, lr_0 = 3.2163e-04
Loss = 2.4305e-01, PNorm = 63.1642, GNorm = 0.9361, lr_0 = 3.2538e-04
Loss = 2.5838e-01, PNorm = 63.1930, GNorm = 1.6080, lr_0 = 3.2912e-04
Loss = 2.4800e-01, PNorm = 63.2251, GNorm = 1.4087, lr_0 = 3.3288e-04
Loss = 2.2478e-01, PNorm = 63.2548, GNorm = 1.6509, lr_0 = 3.3663e-04
Loss = 2.4461e-01, PNorm = 63.2851, GNorm = 1.1645, lr_0 = 3.4038e-04
Loss = 2.7235e-01, PNorm = 63.3200, GNorm = 1.2575, lr_0 = 3.4413e-04
Loss = 2.4983e-01, PNorm = 63.3545, GNorm = 1.1809, lr_0 = 3.4787e-04
Loss = 2.3763e-01, PNorm = 63.3865, GNorm = 1.4528, lr_0 = 3.5162e-04
Loss = 2.3810e-01, PNorm = 63.4176, GNorm = 1.2109, lr_0 = 3.5538e-04
Loss = 2.5539e-01, PNorm = 63.4527, GNorm = 1.7163, lr_0 = 3.5913e-04
Loss = 2.4626e-01, PNorm = 63.4834, GNorm = 1.5431, lr_0 = 3.6288e-04
Loss = 2.3572e-01, PNorm = 63.5160, GNorm = 1.5585, lr_0 = 3.6662e-04
Loss = 2.4911e-01, PNorm = 63.5501, GNorm = 1.4228, lr_0 = 3.7037e-04
Loss = 2.8138e-01, PNorm = 63.5806, GNorm = 1.1126, lr_0 = 3.7413e-04
Loss = 2.6666e-01, PNorm = 63.6199, GNorm = 1.1187, lr_0 = 3.7788e-04
Loss = 2.4553e-01, PNorm = 63.6567, GNorm = 1.6676, lr_0 = 3.8163e-04
Loss = 2.4435e-01, PNorm = 63.6948, GNorm = 1.3794, lr_0 = 3.8537e-04
Loss = 2.8677e-01, PNorm = 63.7316, GNorm = 1.5187, lr_0 = 3.8912e-04
Loss = 2.3843e-01, PNorm = 63.7676, GNorm = 0.9975, lr_0 = 3.9287e-04
Loss = 2.5069e-01, PNorm = 63.8061, GNorm = 1.3047, lr_0 = 3.9663e-04
Loss = 2.4077e-01, PNorm = 63.8392, GNorm = 0.9591, lr_0 = 4.0038e-04
Loss = 2.4961e-01, PNorm = 63.8782, GNorm = 1.2705, lr_0 = 4.0413e-04
Loss = 2.2733e-01, PNorm = 63.9143, GNorm = 1.0829, lr_0 = 4.0787e-04
Loss = 2.5103e-01, PNorm = 63.9530, GNorm = 1.4622, lr_0 = 4.1162e-04
Loss = 2.3477e-01, PNorm = 63.9924, GNorm = 1.2609, lr_0 = 4.1537e-04
Loss = 2.2387e-01, PNorm = 64.0367, GNorm = 1.1544, lr_0 = 4.1913e-04
Loss = 2.9305e-01, PNorm = 64.0710, GNorm = 1.1002, lr_0 = 4.2288e-04
Loss = 2.3752e-01, PNorm = 64.1117, GNorm = 1.2919, lr_0 = 4.2662e-04
Loss = 2.0846e-01, PNorm = 64.1517, GNorm = 0.7739, lr_0 = 4.3037e-04
Loss = 2.6231e-01, PNorm = 64.1863, GNorm = 1.2525, lr_0 = 4.3412e-04
Loss = 2.2799e-01, PNorm = 64.2295, GNorm = 1.0922, lr_0 = 4.3788e-04
Loss = 2.4505e-01, PNorm = 64.2667, GNorm = 1.0160, lr_0 = 4.4163e-04
Loss = 2.4654e-01, PNorm = 64.3133, GNorm = 1.0242, lr_0 = 4.4538e-04
Loss = 2.3999e-01, PNorm = 64.3570, GNorm = 1.2027, lr_0 = 4.4912e-04
Loss = 2.6110e-01, PNorm = 64.3905, GNorm = 0.7942, lr_0 = 4.5287e-04
Loss = 2.4809e-01, PNorm = 64.4365, GNorm = 1.2117, lr_0 = 4.5662e-04
Loss = 2.4015e-01, PNorm = 64.4745, GNorm = 1.0697, lr_0 = 4.6038e-04
Loss = 2.1207e-01, PNorm = 64.5106, GNorm = 1.1616, lr_0 = 4.6413e-04
Loss = 2.1844e-01, PNorm = 64.5496, GNorm = 0.9001, lr_0 = 4.6787e-04
Loss = 2.2031e-01, PNorm = 64.5961, GNorm = 1.0469, lr_0 = 4.7162e-04
Loss = 2.2880e-01, PNorm = 64.6369, GNorm = 1.1189, lr_0 = 4.7537e-04
Loss = 2.0812e-01, PNorm = 64.6824, GNorm = 0.7567, lr_0 = 4.7913e-04
Loss = 2.1372e-01, PNorm = 64.7260, GNorm = 1.1503, lr_0 = 4.8288e-04
Loss = 2.1246e-01, PNorm = 64.7753, GNorm = 0.8317, lr_0 = 4.8663e-04
Loss = 2.4701e-01, PNorm = 64.8170, GNorm = 1.1752, lr_0 = 4.9038e-04
Loss = 2.0475e-01, PNorm = 64.8703, GNorm = 1.0781, lr_0 = 4.9412e-04
Loss = 2.1079e-01, PNorm = 64.9250, GNorm = 1.4121, lr_0 = 4.9788e-04
Loss = 2.5001e-01, PNorm = 64.9642, GNorm = 1.2822, lr_0 = 5.0163e-04
Loss = 2.5694e-01, PNorm = 65.0218, GNorm = 1.0077, lr_0 = 5.0538e-04
Loss = 2.3760e-01, PNorm = 65.0678, GNorm = 1.2404, lr_0 = 5.0913e-04
Loss = 1.9581e-01, PNorm = 65.1186, GNorm = 1.1488, lr_0 = 5.1287e-04
Loss = 2.2160e-01, PNorm = 65.1655, GNorm = 0.8718, lr_0 = 5.1663e-04
Loss = 2.0719e-01, PNorm = 65.2090, GNorm = 0.9375, lr_0 = 5.2038e-04
Loss = 2.1360e-01, PNorm = 65.2542, GNorm = 0.7275, lr_0 = 5.2413e-04
Loss = 2.0827e-01, PNorm = 65.3013, GNorm = 0.8932, lr_0 = 5.2788e-04
Loss = 2.1175e-01, PNorm = 65.3486, GNorm = 0.7091, lr_0 = 5.3162e-04
Loss = 2.1050e-01, PNorm = 65.3949, GNorm = 1.1592, lr_0 = 5.3538e-04
Loss = 1.9391e-01, PNorm = 65.4466, GNorm = 1.2149, lr_0 = 5.3912e-04
Loss = 2.3292e-01, PNorm = 65.4898, GNorm = 0.8986, lr_0 = 5.4288e-04
Loss = 2.3606e-01, PNorm = 65.5503, GNorm = 1.7404, lr_0 = 5.4663e-04
Loss = 2.1951e-01, PNorm = 65.6014, GNorm = 0.8275, lr_0 = 5.5038e-04
Validation mae = 0.549150
Epoch 1
Loss = 1.6738e-01, PNorm = 65.6576, GNorm = 0.6944, lr_0 = 5.5413e-04
Loss = 1.7595e-01, PNorm = 65.7172, GNorm = 0.7637, lr_0 = 5.5787e-04
Loss = 1.5839e-01, PNorm = 65.7690, GNorm = 1.0482, lr_0 = 5.6163e-04
Loss = 1.7428e-01, PNorm = 65.8230, GNorm = 1.0799, lr_0 = 5.6538e-04
Loss = 1.4349e-01, PNorm = 65.8771, GNorm = 0.9422, lr_0 = 5.6913e-04
Loss = 1.5803e-01, PNorm = 65.9182, GNorm = 0.6460, lr_0 = 5.7288e-04
Loss = 1.3835e-01, PNorm = 65.9692, GNorm = 0.8658, lr_0 = 5.7662e-04
Loss = 1.5738e-01, PNorm = 66.0181, GNorm = 1.0751, lr_0 = 5.8038e-04
Loss = 1.3487e-01, PNorm = 66.0694, GNorm = 0.9165, lr_0 = 5.8413e-04
Loss = 1.2773e-01, PNorm = 66.1136, GNorm = 0.7819, lr_0 = 5.8788e-04
Loss = 1.2924e-01, PNorm = 66.1591, GNorm = 0.6851, lr_0 = 5.9163e-04
Loss = 1.4212e-01, PNorm = 66.2067, GNorm = 0.7596, lr_0 = 5.9538e-04
Loss = 1.5441e-01, PNorm = 66.2555, GNorm = 0.7751, lr_0 = 5.9913e-04
Loss = 1.4452e-01, PNorm = 66.3253, GNorm = 1.0787, lr_0 = 6.0288e-04
Loss = 1.3402e-01, PNorm = 66.3773, GNorm = 0.7407, lr_0 = 6.0663e-04
Loss = 1.4635e-01, PNorm = 66.4439, GNorm = 0.9113, lr_0 = 6.1038e-04
Loss = 1.3779e-01, PNorm = 66.5030, GNorm = 0.6434, lr_0 = 6.1413e-04
Loss = 1.4259e-01, PNorm = 66.5579, GNorm = 0.7945, lr_0 = 6.1788e-04
Loss = 1.4848e-01, PNorm = 66.6172, GNorm = 1.0411, lr_0 = 6.2163e-04
Loss = 1.5090e-01, PNorm = 66.6862, GNorm = 0.8011, lr_0 = 6.2538e-04
Loss = 1.6787e-01, PNorm = 66.7589, GNorm = 1.2104, lr_0 = 6.2913e-04
Loss = 1.6257e-01, PNorm = 66.8303, GNorm = 0.9130, lr_0 = 6.3288e-04
Loss = 1.3847e-01, PNorm = 66.9048, GNorm = 0.7648, lr_0 = 6.3663e-04
Loss = 1.6141e-01, PNorm = 66.9800, GNorm = 0.8023, lr_0 = 6.4038e-04
Loss = 1.6079e-01, PNorm = 67.0501, GNorm = 0.9101, lr_0 = 6.4413e-04
Loss = 1.4756e-01, PNorm = 67.1255, GNorm = 0.8759, lr_0 = 6.4788e-04
Loss = 1.6555e-01, PNorm = 67.1948, GNorm = 0.6257, lr_0 = 6.5163e-04
Loss = 1.4527e-01, PNorm = 67.2696, GNorm = 1.0627, lr_0 = 6.5538e-04
Loss = 1.6068e-01, PNorm = 67.3418, GNorm = 1.1761, lr_0 = 6.5913e-04
Loss = 1.5671e-01, PNorm = 67.4248, GNorm = 0.8088, lr_0 = 6.6288e-04
Loss = 1.5218e-01, PNorm = 67.5032, GNorm = 0.7649, lr_0 = 6.6663e-04
Loss = 1.7824e-01, PNorm = 67.5912, GNorm = 1.3713, lr_0 = 6.7038e-04
Loss = 1.5644e-01, PNorm = 67.6794, GNorm = 0.7253, lr_0 = 6.7413e-04
Loss = 1.4120e-01, PNorm = 67.7722, GNorm = 1.1744, lr_0 = 6.7788e-04
Loss = 1.5927e-01, PNorm = 67.8488, GNorm = 0.7134, lr_0 = 6.8163e-04
Loss = 1.7325e-01, PNorm = 67.9329, GNorm = 0.9038, lr_0 = 6.8538e-04
Loss = 1.4117e-01, PNorm = 68.0187, GNorm = 0.9564, lr_0 = 6.8913e-04
Loss = 1.7503e-01, PNorm = 68.0910, GNorm = 0.9105, lr_0 = 6.9288e-04
Loss = 1.5788e-01, PNorm = 68.1764, GNorm = 1.0196, lr_0 = 6.9663e-04
Loss = 1.6465e-01, PNorm = 68.2560, GNorm = 0.8825, lr_0 = 7.0038e-04
Loss = 1.7388e-01, PNorm = 68.3446, GNorm = 0.5437, lr_0 = 7.0413e-04
Loss = 1.6201e-01, PNorm = 68.4386, GNorm = 0.7134, lr_0 = 7.0788e-04
Loss = 1.6632e-01, PNorm = 68.5185, GNorm = 0.6843, lr_0 = 7.1163e-04
Loss = 1.8370e-01, PNorm = 68.6057, GNorm = 0.7345, lr_0 = 7.1538e-04
Loss = 1.5133e-01, PNorm = 68.6972, GNorm = 0.7925, lr_0 = 7.1913e-04
Loss = 1.9022e-01, PNorm = 68.7891, GNorm = 0.7077, lr_0 = 7.2288e-04
Loss = 1.6040e-01, PNorm = 68.8740, GNorm = 0.9264, lr_0 = 7.2663e-04
Loss = 1.6070e-01, PNorm = 68.9654, GNorm = 1.4335, lr_0 = 7.3038e-04
Loss = 1.8422e-01, PNorm = 69.0580, GNorm = 0.8012, lr_0 = 7.3413e-04
Loss = 1.8384e-01, PNorm = 69.1605, GNorm = 1.1637, lr_0 = 7.3788e-04
Loss = 1.6304e-01, PNorm = 69.2612, GNorm = 1.4619, lr_0 = 7.4163e-04
Loss = 1.7166e-01, PNorm = 69.3496, GNorm = 0.8566, lr_0 = 7.4538e-04
Loss = 1.3837e-01, PNorm = 69.4402, GNorm = 0.7018, lr_0 = 7.4913e-04
Loss = 1.6821e-01, PNorm = 69.5160, GNorm = 0.7425, lr_0 = 7.5288e-04
Loss = 1.9530e-01, PNorm = 69.6134, GNorm = 0.7510, lr_0 = 7.5663e-04
Loss = 1.6467e-01, PNorm = 69.7116, GNorm = 0.7402, lr_0 = 7.6038e-04
Loss = 1.9285e-01, PNorm = 69.8105, GNorm = 0.6790, lr_0 = 7.6413e-04
Loss = 1.7876e-01, PNorm = 69.9189, GNorm = 1.1039, lr_0 = 7.6788e-04
Loss = 1.7219e-01, PNorm = 70.0241, GNorm = 0.7854, lr_0 = 7.7163e-04
Loss = 1.7002e-01, PNorm = 70.1188, GNorm = 0.7466, lr_0 = 7.7538e-04
Loss = 1.5632e-01, PNorm = 70.2177, GNorm = 1.1303, lr_0 = 7.7913e-04
Loss = 1.8032e-01, PNorm = 70.3128, GNorm = 1.1018, lr_0 = 7.8288e-04
Loss = 1.4396e-01, PNorm = 70.4125, GNorm = 0.7025, lr_0 = 7.8663e-04
Loss = 1.7485e-01, PNorm = 70.5131, GNorm = 1.0545, lr_0 = 7.9038e-04
Loss = 1.6479e-01, PNorm = 70.6137, GNorm = 0.6657, lr_0 = 7.9413e-04
Loss = 1.9800e-01, PNorm = 70.7219, GNorm = 1.1409, lr_0 = 7.9788e-04
Loss = 1.8000e-01, PNorm = 70.8274, GNorm = 0.5581, lr_0 = 8.0163e-04
Loss = 1.5366e-01, PNorm = 70.9289, GNorm = 1.3500, lr_0 = 8.0538e-04
Loss = 1.6225e-01, PNorm = 71.0226, GNorm = 0.6548, lr_0 = 8.0913e-04
Loss = 1.6901e-01, PNorm = 71.1270, GNorm = 0.7492, lr_0 = 8.1288e-04
Loss = 1.8007e-01, PNorm = 71.2197, GNorm = 1.2880, lr_0 = 8.1663e-04
Loss = 1.5259e-01, PNorm = 71.3266, GNorm = 0.9074, lr_0 = 8.2038e-04
Loss = 1.7581e-01, PNorm = 71.4181, GNorm = 1.0470, lr_0 = 8.2413e-04
Loss = 1.6273e-01, PNorm = 71.5250, GNorm = 0.6705, lr_0 = 8.2788e-04
Loss = 1.7339e-01, PNorm = 71.6194, GNorm = 0.7177, lr_0 = 8.3163e-04
Loss = 1.6563e-01, PNorm = 71.7298, GNorm = 0.8748, lr_0 = 8.3538e-04
Loss = 1.6673e-01, PNorm = 71.8333, GNorm = 0.7121, lr_0 = 8.3913e-04
Loss = 1.7789e-01, PNorm = 71.9417, GNorm = 0.6952, lr_0 = 8.4288e-04
Loss = 1.6024e-01, PNorm = 72.0465, GNorm = 0.8892, lr_0 = 8.4663e-04
Loss = 1.8133e-01, PNorm = 72.1632, GNorm = 1.1100, lr_0 = 8.5038e-04
Loss = 1.6943e-01, PNorm = 72.2682, GNorm = 0.6621, lr_0 = 8.5413e-04
Loss = 1.7889e-01, PNorm = 72.3804, GNorm = 1.2112, lr_0 = 8.5788e-04
Loss = 1.8720e-01, PNorm = 72.5056, GNorm = 0.6377, lr_0 = 8.6163e-04
Loss = 1.7331e-01, PNorm = 72.6275, GNorm = 0.7691, lr_0 = 8.6538e-04
Loss = 1.7375e-01, PNorm = 72.7415, GNorm = 0.8911, lr_0 = 8.6913e-04
Loss = 1.8351e-01, PNorm = 72.8486, GNorm = 1.1194, lr_0 = 8.7288e-04
Loss = 1.8918e-01, PNorm = 72.9654, GNorm = 1.2979, lr_0 = 8.7663e-04
Loss = 1.7544e-01, PNorm = 73.0720, GNorm = 0.8472, lr_0 = 8.8038e-04
Loss = 1.6785e-01, PNorm = 73.1903, GNorm = 1.0848, lr_0 = 8.8413e-04
Loss = 1.8538e-01, PNorm = 73.2963, GNorm = 0.9516, lr_0 = 8.8788e-04
Loss = 1.6861e-01, PNorm = 73.4219, GNorm = 1.2300, lr_0 = 8.9163e-04
Loss = 1.7584e-01, PNorm = 73.5367, GNorm = 0.8851, lr_0 = 8.9538e-04
Loss = 1.7748e-01, PNorm = 73.6604, GNorm = 0.7486, lr_0 = 8.9913e-04
Loss = 1.9750e-01, PNorm = 73.7735, GNorm = 0.9283, lr_0 = 9.0288e-04
Loss = 1.7869e-01, PNorm = 73.9096, GNorm = 0.7689, lr_0 = 9.0663e-04
Loss = 1.8344e-01, PNorm = 74.0261, GNorm = 1.1884, lr_0 = 9.1038e-04
Loss = 2.0160e-01, PNorm = 74.1629, GNorm = 1.4922, lr_0 = 9.1413e-04
Loss = 1.8339e-01, PNorm = 74.2978, GNorm = 0.8697, lr_0 = 9.1788e-04
Loss = 1.7015e-01, PNorm = 74.4275, GNorm = 0.8870, lr_0 = 9.2163e-04
Loss = 1.5974e-01, PNorm = 74.5420, GNorm = 0.6438, lr_0 = 9.2538e-04
Loss = 1.5834e-01, PNorm = 74.6688, GNorm = 0.6476, lr_0 = 9.2913e-04
Loss = 1.9159e-01, PNorm = 74.7968, GNorm = 0.6882, lr_0 = 9.3288e-04
Loss = 1.5886e-01, PNorm = 74.9282, GNorm = 0.7055, lr_0 = 9.3663e-04
Loss = 1.6310e-01, PNorm = 75.0493, GNorm = 0.6613, lr_0 = 9.4038e-04
Loss = 2.0453e-01, PNorm = 75.1806, GNorm = 1.1971, lr_0 = 9.4413e-04
Loss = 1.7965e-01, PNorm = 75.3018, GNorm = 0.7458, lr_0 = 9.4788e-04
Loss = 1.6288e-01, PNorm = 75.4384, GNorm = 0.6728, lr_0 = 9.5163e-04
Loss = 1.4845e-01, PNorm = 75.5605, GNorm = 0.8872, lr_0 = 9.5538e-04
Loss = 1.7198e-01, PNorm = 75.6729, GNorm = 0.7579, lr_0 = 9.5913e-04
Loss = 1.7459e-01, PNorm = 75.7936, GNorm = 1.1908, lr_0 = 9.6288e-04
Loss = 2.0429e-01, PNorm = 75.9157, GNorm = 0.6561, lr_0 = 9.6663e-04
Loss = 1.7178e-01, PNorm = 76.0440, GNorm = 0.9386, lr_0 = 9.7038e-04
Loss = 1.8272e-01, PNorm = 76.1805, GNorm = 0.7896, lr_0 = 9.7413e-04
Loss = 1.7183e-01, PNorm = 76.3004, GNorm = 0.7533, lr_0 = 9.7788e-04
Loss = 1.9752e-01, PNorm = 76.4289, GNorm = 0.8872, lr_0 = 9.8163e-04
Loss = 1.5788e-01, PNorm = 76.5609, GNorm = 1.1616, lr_0 = 9.8537e-04
Loss = 1.7079e-01, PNorm = 76.6866, GNorm = 0.7443, lr_0 = 9.8912e-04
Loss = 1.7732e-01, PNorm = 76.8063, GNorm = 0.7902, lr_0 = 9.9288e-04
Loss = 1.9313e-01, PNorm = 76.9223, GNorm = 0.9778, lr_0 = 9.9663e-04
Loss = 1.8777e-01, PNorm = 77.0652, GNorm = 0.5876, lr_0 = 9.9993e-04
Validation mae = 0.528930
Epoch 2
Loss = 1.1640e-01, PNorm = 77.1958, GNorm = 0.6818, lr_0 = 9.9925e-04
Loss = 1.0038e-01, PNorm = 77.3055, GNorm = 0.6787, lr_0 = 9.9856e-04
Loss = 1.2179e-01, PNorm = 77.4012, GNorm = 0.6622, lr_0 = 9.9788e-04
Loss = 1.0627e-01, PNorm = 77.4969, GNorm = 0.5465, lr_0 = 9.9719e-04
Loss = 1.0102e-01, PNorm = 77.5955, GNorm = 0.4872, lr_0 = 9.9651e-04
Loss = 1.0849e-01, PNorm = 77.6716, GNorm = 0.6207, lr_0 = 9.9583e-04
Loss = 1.1248e-01, PNorm = 77.7646, GNorm = 0.6624, lr_0 = 9.9515e-04
Loss = 1.0466e-01, PNorm = 77.8553, GNorm = 0.6328, lr_0 = 9.9446e-04
Loss = 1.2012e-01, PNorm = 77.9505, GNorm = 0.5273, lr_0 = 9.9378e-04
Loss = 1.0408e-01, PNorm = 78.0380, GNorm = 0.4811, lr_0 = 9.9310e-04
Loss = 1.2163e-01, PNorm = 78.1449, GNorm = 0.8165, lr_0 = 9.9242e-04
Loss = 1.3101e-01, PNorm = 78.2416, GNorm = 0.7967, lr_0 = 9.9174e-04
Loss = 1.0951e-01, PNorm = 78.3571, GNorm = 0.9587, lr_0 = 9.9106e-04
Loss = 1.1970e-01, PNorm = 78.4561, GNorm = 0.4813, lr_0 = 9.9038e-04
Loss = 1.0451e-01, PNorm = 78.5547, GNorm = 0.5777, lr_0 = 9.8971e-04
Loss = 1.0075e-01, PNorm = 78.6564, GNorm = 0.5913, lr_0 = 9.8903e-04
Loss = 1.1141e-01, PNorm = 78.7656, GNorm = 0.8958, lr_0 = 9.8835e-04
Loss = 1.2384e-01, PNorm = 78.8693, GNorm = 1.1703, lr_0 = 9.8767e-04
Loss = 1.0119e-01, PNorm = 78.9746, GNorm = 0.5237, lr_0 = 9.8700e-04
Loss = 1.0092e-01, PNorm = 79.0737, GNorm = 0.8211, lr_0 = 9.8632e-04
Loss = 1.1882e-01, PNorm = 79.1743, GNorm = 0.4744, lr_0 = 9.8564e-04
Loss = 1.0530e-01, PNorm = 79.2886, GNorm = 0.5846, lr_0 = 9.8497e-04
Loss = 1.1639e-01, PNorm = 79.3968, GNorm = 0.6221, lr_0 = 9.8429e-04
Loss = 9.9964e-02, PNorm = 79.5075, GNorm = 0.5057, lr_0 = 9.8362e-04
Loss = 9.2259e-02, PNorm = 79.6055, GNorm = 0.4460, lr_0 = 9.8295e-04
Loss = 9.7638e-02, PNorm = 79.7074, GNorm = 0.7112, lr_0 = 9.8227e-04
Loss = 9.8747e-02, PNorm = 79.7941, GNorm = 0.4540, lr_0 = 9.8160e-04
Loss = 1.0422e-01, PNorm = 79.8786, GNorm = 0.5295, lr_0 = 9.8093e-04
Loss = 9.9715e-02, PNorm = 79.9730, GNorm = 0.5351, lr_0 = 9.8026e-04
Loss = 1.1151e-01, PNorm = 80.0760, GNorm = 0.5595, lr_0 = 9.7958e-04
Loss = 1.1347e-01, PNorm = 80.1859, GNorm = 0.8948, lr_0 = 9.7891e-04
Loss = 1.0365e-01, PNorm = 80.2897, GNorm = 0.6358, lr_0 = 9.7824e-04
Loss = 1.1259e-01, PNorm = 80.3873, GNorm = 0.3382, lr_0 = 9.7757e-04
Loss = 1.1351e-01, PNorm = 80.5028, GNorm = 0.4864, lr_0 = 9.7690e-04
Loss = 1.2039e-01, PNorm = 80.6192, GNorm = 0.5365, lr_0 = 9.7623e-04
Loss = 1.0492e-01, PNorm = 80.7259, GNorm = 0.5638, lr_0 = 9.7556e-04
Loss = 1.1085e-01, PNorm = 80.8424, GNorm = 0.7050, lr_0 = 9.7490e-04
Loss = 1.0835e-01, PNorm = 80.9531, GNorm = 0.6889, lr_0 = 9.7423e-04
Loss = 1.2540e-01, PNorm = 81.0578, GNorm = 0.9672, lr_0 = 9.7356e-04
Loss = 1.0507e-01, PNorm = 81.1565, GNorm = 0.4694, lr_0 = 9.7289e-04
Loss = 1.1799e-01, PNorm = 81.2768, GNorm = 0.5866, lr_0 = 9.7223e-04
Loss = 1.0973e-01, PNorm = 81.3883, GNorm = 0.9670, lr_0 = 9.7156e-04
Loss = 1.0639e-01, PNorm = 81.5009, GNorm = 0.6229, lr_0 = 9.7090e-04
Loss = 9.9074e-02, PNorm = 81.6006, GNorm = 0.6762, lr_0 = 9.7023e-04
Loss = 1.0524e-01, PNorm = 81.6972, GNorm = 0.4375, lr_0 = 9.6957e-04
Loss = 1.3213e-01, PNorm = 81.8046, GNorm = 0.8407, lr_0 = 9.6890e-04
Loss = 1.0622e-01, PNorm = 81.9075, GNorm = 0.5156, lr_0 = 9.6824e-04
Loss = 8.5175e-02, PNorm = 82.0035, GNorm = 0.6135, lr_0 = 9.6757e-04
Loss = 1.2292e-01, PNorm = 82.0945, GNorm = 1.0813, lr_0 = 9.6691e-04
Loss = 9.7415e-02, PNorm = 82.2064, GNorm = 0.6637, lr_0 = 9.6625e-04
Loss = 1.0802e-01, PNorm = 82.3027, GNorm = 0.5433, lr_0 = 9.6559e-04
Loss = 1.1322e-01, PNorm = 82.4139, GNorm = 0.6095, lr_0 = 9.6493e-04
Loss = 1.1953e-01, PNorm = 82.5201, GNorm = 1.0619, lr_0 = 9.6427e-04
Loss = 1.2738e-01, PNorm = 82.6357, GNorm = 0.7587, lr_0 = 9.6360e-04
Loss = 1.1921e-01, PNorm = 82.7575, GNorm = 0.4006, lr_0 = 9.6294e-04
Loss = 9.5354e-02, PNorm = 82.8753, GNorm = 0.5751, lr_0 = 9.6228e-04
Loss = 1.1233e-01, PNorm = 82.9684, GNorm = 1.2419, lr_0 = 9.6163e-04
Loss = 1.2868e-01, PNorm = 83.0729, GNorm = 0.7651, lr_0 = 9.6097e-04
Loss = 1.3115e-01, PNorm = 83.2034, GNorm = 0.5795, lr_0 = 9.6031e-04
Loss = 1.1727e-01, PNorm = 83.3153, GNorm = 0.5673, lr_0 = 9.5965e-04
Loss = 1.0455e-01, PNorm = 83.4345, GNorm = 0.4967, lr_0 = 9.5899e-04
Loss = 1.2255e-01, PNorm = 83.5398, GNorm = 0.5206, lr_0 = 9.5834e-04
Loss = 1.2667e-01, PNorm = 83.6468, GNorm = 0.6821, lr_0 = 9.5768e-04
Loss = 1.1575e-01, PNorm = 83.7629, GNorm = 0.8114, lr_0 = 9.5702e-04
Loss = 1.0578e-01, PNorm = 83.8683, GNorm = 0.6601, lr_0 = 9.5637e-04
Loss = 1.1615e-01, PNorm = 83.9792, GNorm = 0.6712, lr_0 = 9.5571e-04
Loss = 1.1162e-01, PNorm = 84.0873, GNorm = 0.5694, lr_0 = 9.5506e-04
Loss = 1.1072e-01, PNorm = 84.1944, GNorm = 0.6413, lr_0 = 9.5440e-04
Loss = 9.7824e-02, PNorm = 84.2909, GNorm = 0.6562, lr_0 = 9.5375e-04
Loss = 1.0935e-01, PNorm = 84.3956, GNorm = 0.5116, lr_0 = 9.5310e-04
Loss = 1.1409e-01, PNorm = 84.4879, GNorm = 0.6503, lr_0 = 9.5244e-04
Loss = 1.0635e-01, PNorm = 84.5999, GNorm = 0.6089, lr_0 = 9.5179e-04
Loss = 1.0634e-01, PNorm = 84.7010, GNorm = 0.4932, lr_0 = 9.5114e-04
Loss = 1.2211e-01, PNorm = 84.8033, GNorm = 0.9344, lr_0 = 9.5049e-04
Loss = 1.2833e-01, PNorm = 84.9202, GNorm = 0.4714, lr_0 = 9.4984e-04
Loss = 1.0595e-01, PNorm = 85.0520, GNorm = 0.6287, lr_0 = 9.4919e-04
Loss = 1.2348e-01, PNorm = 85.1716, GNorm = 0.5270, lr_0 = 9.4854e-04
Loss = 1.2719e-01, PNorm = 85.2853, GNorm = 0.7902, lr_0 = 9.4789e-04
Loss = 1.0847e-01, PNorm = 85.3947, GNorm = 1.0701, lr_0 = 9.4724e-04
Loss = 1.1800e-01, PNorm = 85.5090, GNorm = 0.6025, lr_0 = 9.4659e-04
Loss = 1.2320e-01, PNorm = 85.6251, GNorm = 0.4065, lr_0 = 9.4594e-04
Loss = 1.0410e-01, PNorm = 85.7297, GNorm = 0.6756, lr_0 = 9.4529e-04
Loss = 1.2501e-01, PNorm = 85.8339, GNorm = 0.5792, lr_0 = 9.4464e-04
Loss = 1.1009e-01, PNorm = 85.9366, GNorm = 0.5980, lr_0 = 9.4400e-04
Loss = 1.0938e-01, PNorm = 86.0466, GNorm = 0.8705, lr_0 = 9.4335e-04
Loss = 1.0535e-01, PNorm = 86.1550, GNorm = 0.7221, lr_0 = 9.4270e-04
Loss = 1.0322e-01, PNorm = 86.2650, GNorm = 0.5359, lr_0 = 9.4206e-04
Loss = 1.1233e-01, PNorm = 86.3758, GNorm = 1.0477, lr_0 = 9.4141e-04
Loss = 1.1672e-01, PNorm = 86.4737, GNorm = 0.4189, lr_0 = 9.4077e-04
Loss = 1.1796e-01, PNorm = 86.5751, GNorm = 0.5817, lr_0 = 9.4012e-04
Loss = 1.1172e-01, PNorm = 86.6728, GNorm = 0.6065, lr_0 = 9.3948e-04
Loss = 1.1235e-01, PNorm = 86.7717, GNorm = 0.9526, lr_0 = 9.3884e-04
Loss = 1.3045e-01, PNorm = 86.8785, GNorm = 1.0764, lr_0 = 9.3819e-04
Loss = 1.1120e-01, PNorm = 87.0041, GNorm = 0.8817, lr_0 = 9.3755e-04
Loss = 1.1782e-01, PNorm = 87.1034, GNorm = 0.5330, lr_0 = 9.3691e-04
Loss = 1.3557e-01, PNorm = 87.2149, GNorm = 0.5208, lr_0 = 9.3627e-04
Loss = 9.9838e-02, PNorm = 87.3264, GNorm = 0.6979, lr_0 = 9.3562e-04
Loss = 1.0722e-01, PNorm = 87.4360, GNorm = 0.8630, lr_0 = 9.3498e-04
Loss = 1.3455e-01, PNorm = 87.5425, GNorm = 0.5331, lr_0 = 9.3434e-04
Loss = 1.2131e-01, PNorm = 87.6625, GNorm = 0.9256, lr_0 = 9.3370e-04
Loss = 1.0214e-01, PNorm = 87.7737, GNorm = 0.4676, lr_0 = 9.3306e-04
Loss = 1.1187e-01, PNorm = 87.8915, GNorm = 0.4228, lr_0 = 9.3242e-04
Loss = 1.4244e-01, PNorm = 88.0005, GNorm = 0.9093, lr_0 = 9.3178e-04
Loss = 1.3224e-01, PNorm = 88.1197, GNorm = 0.9989, lr_0 = 9.3115e-04
Loss = 1.2360e-01, PNorm = 88.2503, GNorm = 0.4810, lr_0 = 9.3051e-04
Loss = 1.1450e-01, PNorm = 88.3663, GNorm = 0.7388, lr_0 = 9.2987e-04
Loss = 1.1776e-01, PNorm = 88.4744, GNorm = 0.7660, lr_0 = 9.2923e-04
Loss = 1.3993e-01, PNorm = 88.5876, GNorm = 0.4719, lr_0 = 9.2860e-04
Loss = 1.1431e-01, PNorm = 88.7058, GNorm = 0.9326, lr_0 = 9.2796e-04
Loss = 1.0902e-01, PNorm = 88.8176, GNorm = 1.1161, lr_0 = 9.2733e-04
Loss = 1.1750e-01, PNorm = 88.9309, GNorm = 0.5095, lr_0 = 9.2669e-04
Loss = 1.2015e-01, PNorm = 89.0329, GNorm = 1.2546, lr_0 = 9.2606e-04
Loss = 1.0222e-01, PNorm = 89.1366, GNorm = 0.4793, lr_0 = 9.2542e-04
Loss = 1.1547e-01, PNorm = 89.2316, GNorm = 0.5191, lr_0 = 9.2479e-04
Loss = 1.1424e-01, PNorm = 89.3309, GNorm = 0.6274, lr_0 = 9.2415e-04
Loss = 1.1699e-01, PNorm = 89.4419, GNorm = 0.9372, lr_0 = 9.2352e-04
Loss = 1.1809e-01, PNorm = 89.5522, GNorm = 0.5577, lr_0 = 9.2289e-04
Loss = 1.0349e-01, PNorm = 89.6553, GNorm = 0.5549, lr_0 = 9.2226e-04
Loss = 1.3343e-01, PNorm = 89.7651, GNorm = 0.7190, lr_0 = 9.2162e-04
Loss = 1.2387e-01, PNorm = 89.8886, GNorm = 1.2634, lr_0 = 9.2099e-04
Validation mae = 0.519010
Epoch 3
Loss = 8.3377e-02, PNorm = 89.9848, GNorm = 1.1275, lr_0 = 9.2036e-04
Loss = 7.1395e-02, PNorm = 90.0724, GNorm = 0.3903, lr_0 = 9.1973e-04
Loss = 6.9403e-02, PNorm = 90.1403, GNorm = 0.6970, lr_0 = 9.1910e-04
Loss = 6.9004e-02, PNorm = 90.1979, GNorm = 0.4462, lr_0 = 9.1847e-04
Loss = 7.3091e-02, PNorm = 90.2609, GNorm = 1.2892, lr_0 = 9.1784e-04
Loss = 7.5026e-02, PNorm = 90.3285, GNorm = 0.9220, lr_0 = 9.1721e-04
Loss = 6.3577e-02, PNorm = 90.3912, GNorm = 0.3873, lr_0 = 9.1658e-04
Loss = 6.8378e-02, PNorm = 90.4542, GNorm = 0.5137, lr_0 = 9.1596e-04
Loss = 6.7870e-02, PNorm = 90.5146, GNorm = 0.3610, lr_0 = 9.1533e-04
Loss = 6.4387e-02, PNorm = 90.5792, GNorm = 0.4455, lr_0 = 9.1470e-04
Loss = 7.0697e-02, PNorm = 90.6491, GNorm = 0.6622, lr_0 = 9.1408e-04
Loss = 6.0961e-02, PNorm = 90.7184, GNorm = 0.3808, lr_0 = 9.1345e-04
Loss = 6.2976e-02, PNorm = 90.7821, GNorm = 0.5940, lr_0 = 9.1282e-04
Loss = 6.7689e-02, PNorm = 90.8391, GNorm = 0.6553, lr_0 = 9.1220e-04
Loss = 5.8024e-02, PNorm = 90.9070, GNorm = 0.3438, lr_0 = 9.1157e-04
Loss = 6.5957e-02, PNorm = 90.9761, GNorm = 0.3500, lr_0 = 9.1095e-04
Loss = 6.3641e-02, PNorm = 91.0475, GNorm = 0.5883, lr_0 = 9.1032e-04
Loss = 6.7485e-02, PNorm = 91.1070, GNorm = 0.4108, lr_0 = 9.0970e-04
Loss = 5.8473e-02, PNorm = 91.1609, GNorm = 0.2969, lr_0 = 9.0908e-04
Loss = 8.0498e-02, PNorm = 91.2201, GNorm = 0.5025, lr_0 = 9.0846e-04
Loss = 7.1536e-02, PNorm = 91.2892, GNorm = 0.3158, lr_0 = 9.0783e-04
Loss = 7.0485e-02, PNorm = 91.3540, GNorm = 0.4840, lr_0 = 9.0721e-04
Loss = 6.1149e-02, PNorm = 91.4225, GNorm = 0.7578, lr_0 = 9.0659e-04
Loss = 7.3889e-02, PNorm = 91.5070, GNorm = 0.5198, lr_0 = 9.0597e-04
Loss = 6.3942e-02, PNorm = 91.5764, GNorm = 0.5507, lr_0 = 9.0535e-04
Loss = 7.3603e-02, PNorm = 91.6598, GNorm = 0.4333, lr_0 = 9.0473e-04
Loss = 7.4537e-02, PNorm = 91.7300, GNorm = 0.5219, lr_0 = 9.0411e-04
Loss = 6.9436e-02, PNorm = 91.8124, GNorm = 0.3681, lr_0 = 9.0349e-04
Loss = 7.3582e-02, PNorm = 91.8775, GNorm = 0.7552, lr_0 = 9.0287e-04
Loss = 6.9253e-02, PNorm = 91.9561, GNorm = 0.4858, lr_0 = 9.0225e-04
Loss = 7.4960e-02, PNorm = 92.0307, GNorm = 0.7972, lr_0 = 9.0163e-04
Loss = 6.8965e-02, PNorm = 92.1082, GNorm = 0.3695, lr_0 = 9.0102e-04
Loss = 6.7137e-02, PNorm = 92.1840, GNorm = 0.5679, lr_0 = 9.0040e-04
Loss = 6.6113e-02, PNorm = 92.2525, GNorm = 0.5386, lr_0 = 8.9978e-04
Loss = 7.4547e-02, PNorm = 92.3299, GNorm = 0.4690, lr_0 = 8.9916e-04
Loss = 7.4020e-02, PNorm = 92.4092, GNorm = 0.5952, lr_0 = 8.9855e-04
Loss = 7.1500e-02, PNorm = 92.4966, GNorm = 0.3617, lr_0 = 8.9793e-04
Loss = 6.2385e-02, PNorm = 92.5739, GNorm = 0.6742, lr_0 = 8.9732e-04
Loss = 6.1433e-02, PNorm = 92.6405, GNorm = 0.3626, lr_0 = 8.9670e-04
Loss = 6.6496e-02, PNorm = 92.7165, GNorm = 0.4980, lr_0 = 8.9609e-04
Loss = 7.6299e-02, PNorm = 92.7892, GNorm = 0.6442, lr_0 = 8.9548e-04
Loss = 7.2132e-02, PNorm = 92.8794, GNorm = 0.7705, lr_0 = 8.9486e-04
Loss = 7.0545e-02, PNorm = 92.9558, GNorm = 0.5153, lr_0 = 8.9425e-04
Loss = 6.7535e-02, PNorm = 93.0476, GNorm = 0.4485, lr_0 = 8.9364e-04
Loss = 6.6315e-02, PNorm = 93.1212, GNorm = 0.5036, lr_0 = 8.9302e-04
Loss = 7.3289e-02, PNorm = 93.2024, GNorm = 0.6223, lr_0 = 8.9241e-04
Loss = 5.9442e-02, PNorm = 93.2729, GNorm = 0.3398, lr_0 = 8.9180e-04
Loss = 6.5391e-02, PNorm = 93.3477, GNorm = 0.5912, lr_0 = 8.9119e-04
Loss = 6.4044e-02, PNorm = 93.4187, GNorm = 0.6986, lr_0 = 8.9058e-04
Loss = 7.6085e-02, PNorm = 93.4986, GNorm = 0.8630, lr_0 = 8.8997e-04
Loss = 5.5775e-02, PNorm = 93.5675, GNorm = 0.4348, lr_0 = 8.8936e-04
Loss = 7.2389e-02, PNorm = 93.6466, GNorm = 0.6127, lr_0 = 8.8875e-04
Loss = 7.5761e-02, PNorm = 93.7274, GNorm = 0.6842, lr_0 = 8.8814e-04
Loss = 7.2579e-02, PNorm = 93.8162, GNorm = 0.8600, lr_0 = 8.8753e-04
Loss = 6.1167e-02, PNorm = 93.8870, GNorm = 0.4060, lr_0 = 8.8693e-04
Loss = 6.4507e-02, PNorm = 93.9610, GNorm = 0.5178, lr_0 = 8.8632e-04
Loss = 8.1334e-02, PNorm = 94.0368, GNorm = 0.4403, lr_0 = 8.8571e-04
Loss = 7.1693e-02, PNorm = 94.1223, GNorm = 0.5942, lr_0 = 8.8510e-04
Loss = 6.8448e-02, PNorm = 94.2052, GNorm = 0.4404, lr_0 = 8.8450e-04
Loss = 6.7510e-02, PNorm = 94.2795, GNorm = 0.5078, lr_0 = 8.8389e-04
Loss = 5.8579e-02, PNorm = 94.3546, GNorm = 0.3880, lr_0 = 8.8329e-04
Loss = 7.3634e-02, PNorm = 94.4244, GNorm = 0.5829, lr_0 = 8.8268e-04
Loss = 7.6454e-02, PNorm = 94.5007, GNorm = 0.7930, lr_0 = 8.8208e-04
Loss = 6.8651e-02, PNorm = 94.5836, GNorm = 0.4910, lr_0 = 8.8147e-04
Loss = 7.4106e-02, PNorm = 94.6739, GNorm = 0.4748, lr_0 = 8.8087e-04
Loss = 6.9613e-02, PNorm = 94.7601, GNorm = 0.5657, lr_0 = 8.8026e-04
Loss = 7.3340e-02, PNorm = 94.8468, GNorm = 0.7109, lr_0 = 8.7966e-04
Loss = 7.7216e-02, PNorm = 94.9234, GNorm = 0.6460, lr_0 = 8.7906e-04
Loss = 7.5832e-02, PNorm = 95.0163, GNorm = 0.6194, lr_0 = 8.7846e-04
Loss = 6.6155e-02, PNorm = 95.0872, GNorm = 0.4456, lr_0 = 8.7785e-04
Loss = 8.1655e-02, PNorm = 95.1804, GNorm = 1.2414, lr_0 = 8.7725e-04
Loss = 7.5260e-02, PNorm = 95.2504, GNorm = 0.5878, lr_0 = 8.7665e-04
Loss = 6.3341e-02, PNorm = 95.3361, GNorm = 0.4991, lr_0 = 8.7605e-04
Loss = 6.2756e-02, PNorm = 95.4180, GNorm = 0.4940, lr_0 = 8.7545e-04
Loss = 7.1502e-02, PNorm = 95.4984, GNorm = 0.5719, lr_0 = 8.7485e-04
Loss = 6.9705e-02, PNorm = 95.5801, GNorm = 0.3698, lr_0 = 8.7425e-04
Loss = 6.6560e-02, PNorm = 95.6502, GNorm = 1.0654, lr_0 = 8.7365e-04
Loss = 6.4344e-02, PNorm = 95.7194, GNorm = 0.5394, lr_0 = 8.7306e-04
Loss = 8.0911e-02, PNorm = 95.8061, GNorm = 0.7032, lr_0 = 8.7246e-04
Loss = 6.9096e-02, PNorm = 95.8890, GNorm = 0.7113, lr_0 = 8.7186e-04
Loss = 7.7501e-02, PNorm = 95.9654, GNorm = 1.1607, lr_0 = 8.7126e-04
Loss = 7.4991e-02, PNorm = 96.0470, GNorm = 0.3433, lr_0 = 8.7067e-04
Loss = 7.1933e-02, PNorm = 96.1436, GNorm = 0.4186, lr_0 = 8.7007e-04
Loss = 6.6356e-02, PNorm = 96.2257, GNorm = 0.4802, lr_0 = 8.6947e-04
Loss = 7.9404e-02, PNorm = 96.3008, GNorm = 0.8149, lr_0 = 8.6888e-04
Loss = 8.0400e-02, PNorm = 96.3952, GNorm = 0.4273, lr_0 = 8.6828e-04
Loss = 6.6322e-02, PNorm = 96.4799, GNorm = 0.3308, lr_0 = 8.6769e-04
Loss = 7.7349e-02, PNorm = 96.5659, GNorm = 0.7237, lr_0 = 8.6709e-04
Loss = 8.3998e-02, PNorm = 96.6510, GNorm = 0.6242, lr_0 = 8.6650e-04
Loss = 8.3197e-02, PNorm = 96.7526, GNorm = 0.5223, lr_0 = 8.6590e-04
Loss = 7.9142e-02, PNorm = 96.8494, GNorm = 0.3764, lr_0 = 8.6531e-04
Loss = 7.3753e-02, PNorm = 96.9373, GNorm = 1.2369, lr_0 = 8.6472e-04
Loss = 8.2801e-02, PNorm = 97.0221, GNorm = 0.4835, lr_0 = 8.6413e-04
Loss = 8.3129e-02, PNorm = 97.1199, GNorm = 0.4731, lr_0 = 8.6353e-04
Loss = 8.2891e-02, PNorm = 97.2014, GNorm = 0.4456, lr_0 = 8.6294e-04
Loss = 7.1829e-02, PNorm = 97.2889, GNorm = 0.5807, lr_0 = 8.6235e-04
Loss = 9.1530e-02, PNorm = 97.3773, GNorm = 0.4110, lr_0 = 8.6176e-04
Loss = 6.8175e-02, PNorm = 97.4685, GNorm = 0.8005, lr_0 = 8.6117e-04
Loss = 7.5690e-02, PNorm = 97.5521, GNorm = 0.3449, lr_0 = 8.6058e-04
Loss = 7.2133e-02, PNorm = 97.6444, GNorm = 0.5951, lr_0 = 8.5999e-04
Loss = 7.9140e-02, PNorm = 97.7309, GNorm = 0.9154, lr_0 = 8.5940e-04
Loss = 8.0100e-02, PNorm = 97.8202, GNorm = 0.7784, lr_0 = 8.5881e-04
Loss = 7.6242e-02, PNorm = 97.9123, GNorm = 0.9077, lr_0 = 8.5823e-04
Loss = 7.5865e-02, PNorm = 98.0052, GNorm = 0.6472, lr_0 = 8.5764e-04
Loss = 7.8045e-02, PNorm = 98.0985, GNorm = 0.4976, lr_0 = 8.5705e-04
Loss = 7.6038e-02, PNorm = 98.1851, GNorm = 0.6496, lr_0 = 8.5646e-04
Loss = 6.8634e-02, PNorm = 98.2797, GNorm = 0.6203, lr_0 = 8.5588e-04
Loss = 7.1991e-02, PNorm = 98.3625, GNorm = 0.6542, lr_0 = 8.5529e-04
Loss = 8.5442e-02, PNorm = 98.4516, GNorm = 0.6288, lr_0 = 8.5470e-04
Loss = 8.0831e-02, PNorm = 98.5441, GNorm = 0.6794, lr_0 = 8.5412e-04
Loss = 7.8477e-02, PNorm = 98.6445, GNorm = 0.3639, lr_0 = 8.5353e-04
Loss = 7.5720e-02, PNorm = 98.7390, GNorm = 0.5198, lr_0 = 8.5295e-04
Loss = 7.8333e-02, PNorm = 98.8424, GNorm = 0.3258, lr_0 = 8.5236e-04
Loss = 8.7295e-02, PNorm = 98.9431, GNorm = 0.7157, lr_0 = 8.5178e-04
Loss = 6.8422e-02, PNorm = 99.0355, GNorm = 0.5259, lr_0 = 8.5120e-04
Loss = 9.1277e-02, PNorm = 99.1246, GNorm = 0.4506, lr_0 = 8.5061e-04
Loss = 7.6086e-02, PNorm = 99.2249, GNorm = 0.5658, lr_0 = 8.5003e-04
Loss = 7.9406e-02, PNorm = 99.3171, GNorm = 0.3469, lr_0 = 8.4945e-04
Loss = 9.0807e-02, PNorm = 99.4111, GNorm = 0.6594, lr_0 = 8.4887e-04
Loss = 7.6377e-02, PNorm = 99.5039, GNorm = 0.4272, lr_0 = 8.4828e-04
Validation mae = 0.505861
Epoch 4
Loss = 5.4316e-02, PNorm = 99.5834, GNorm = 0.3830, lr_0 = 8.4770e-04
Loss = 5.7074e-02, PNorm = 99.6573, GNorm = 0.4420, lr_0 = 8.4712e-04
Loss = 5.6806e-02, PNorm = 99.7203, GNorm = 0.4584, lr_0 = 8.4654e-04
Loss = 4.9455e-02, PNorm = 99.7817, GNorm = 0.6683, lr_0 = 8.4596e-04
Loss = 4.5602e-02, PNorm = 99.8471, GNorm = 0.7539, lr_0 = 8.4538e-04
Loss = 5.8533e-02, PNorm = 99.9066, GNorm = 0.5711, lr_0 = 8.4480e-04
Loss = 4.3640e-02, PNorm = 99.9730, GNorm = 0.4845, lr_0 = 8.4423e-04
Loss = 5.1854e-02, PNorm = 100.0309, GNorm = 0.2966, lr_0 = 8.4365e-04
Loss = 4.8587e-02, PNorm = 100.1013, GNorm = 0.4862, lr_0 = 8.4307e-04
Loss = 4.1039e-02, PNorm = 100.1604, GNorm = 0.2509, lr_0 = 8.4249e-04
Loss = 4.8989e-02, PNorm = 100.2127, GNorm = 0.2879, lr_0 = 8.4191e-04
Loss = 5.0788e-02, PNorm = 100.2744, GNorm = 0.3039, lr_0 = 8.4134e-04
Loss = 6.0433e-02, PNorm = 100.3293, GNorm = 0.2998, lr_0 = 8.4076e-04
Loss = 4.5875e-02, PNorm = 100.3879, GNorm = 1.0687, lr_0 = 8.4019e-04
Loss = 4.7900e-02, PNorm = 100.4452, GNorm = 0.3946, lr_0 = 8.3961e-04
Loss = 5.6864e-02, PNorm = 100.5138, GNorm = 0.5438, lr_0 = 8.3903e-04
Loss = 4.0350e-02, PNorm = 100.5804, GNorm = 0.8360, lr_0 = 8.3846e-04
Loss = 5.1557e-02, PNorm = 100.6314, GNorm = 0.3358, lr_0 = 8.3789e-04
Loss = 5.0218e-02, PNorm = 100.6850, GNorm = 0.3764, lr_0 = 8.3731e-04
Loss = 3.8661e-02, PNorm = 100.7339, GNorm = 0.2509, lr_0 = 8.3674e-04
Loss = 4.3282e-02, PNorm = 100.7813, GNorm = 0.2530, lr_0 = 8.3616e-04
Loss = 4.8024e-02, PNorm = 100.8365, GNorm = 0.3629, lr_0 = 8.3559e-04
Loss = 5.1712e-02, PNorm = 100.8895, GNorm = 0.6645, lr_0 = 8.3502e-04
Loss = 4.2994e-02, PNorm = 100.9478, GNorm = 0.2690, lr_0 = 8.3445e-04
Loss = 4.0628e-02, PNorm = 101.0018, GNorm = 0.3067, lr_0 = 8.3388e-04
Loss = 4.5847e-02, PNorm = 101.0537, GNorm = 0.5625, lr_0 = 8.3330e-04
Loss = 4.8322e-02, PNorm = 101.1160, GNorm = 0.2899, lr_0 = 8.3273e-04
Loss = 3.8130e-02, PNorm = 101.1750, GNorm = 0.3067, lr_0 = 8.3216e-04
Loss = 4.4240e-02, PNorm = 101.2279, GNorm = 0.3852, lr_0 = 8.3159e-04
Loss = 4.4225e-02, PNorm = 101.2799, GNorm = 0.3688, lr_0 = 8.3102e-04
Loss = 4.6448e-02, PNorm = 101.3360, GNorm = 0.5518, lr_0 = 8.3045e-04
Loss = 4.7888e-02, PNorm = 101.3994, GNorm = 0.3422, lr_0 = 8.2988e-04
Loss = 5.6257e-02, PNorm = 101.4600, GNorm = 0.6175, lr_0 = 8.2932e-04
Loss = 5.2014e-02, PNorm = 101.5209, GNorm = 0.4279, lr_0 = 8.2875e-04
Loss = 5.5918e-02, PNorm = 101.5925, GNorm = 0.7754, lr_0 = 8.2818e-04
Loss = 5.0276e-02, PNorm = 101.6515, GNorm = 0.4440, lr_0 = 8.2761e-04
Loss = 5.5104e-02, PNorm = 101.7241, GNorm = 0.4157, lr_0 = 8.2705e-04
Loss = 5.5785e-02, PNorm = 101.7796, GNorm = 1.0335, lr_0 = 8.2648e-04
Loss = 4.9268e-02, PNorm = 101.8484, GNorm = 0.8797, lr_0 = 8.2591e-04
Loss = 4.5136e-02, PNorm = 101.8976, GNorm = 0.8414, lr_0 = 8.2535e-04
Loss = 4.7571e-02, PNorm = 101.9651, GNorm = 0.2905, lr_0 = 8.2478e-04
Loss = 5.5720e-02, PNorm = 102.0234, GNorm = 0.4849, lr_0 = 8.2422e-04
Loss = 4.5997e-02, PNorm = 102.0939, GNorm = 0.4263, lr_0 = 8.2365e-04
Loss = 4.4548e-02, PNorm = 102.1620, GNorm = 0.2200, lr_0 = 8.2309e-04
Loss = 4.2632e-02, PNorm = 102.2234, GNorm = 0.4493, lr_0 = 8.2252e-04
Loss = 4.3201e-02, PNorm = 102.2865, GNorm = 0.2946, lr_0 = 8.2196e-04
Loss = 4.9291e-02, PNorm = 102.3414, GNorm = 0.5844, lr_0 = 8.2140e-04
Loss = 4.8379e-02, PNorm = 102.4023, GNorm = 0.3916, lr_0 = 8.2084e-04
Loss = 4.7785e-02, PNorm = 102.4669, GNorm = 0.5567, lr_0 = 8.2027e-04
Loss = 4.8985e-02, PNorm = 102.5312, GNorm = 0.4828, lr_0 = 8.1971e-04
Loss = 5.3267e-02, PNorm = 102.6013, GNorm = 0.6858, lr_0 = 8.1915e-04
Loss = 4.4906e-02, PNorm = 102.6767, GNorm = 0.2712, lr_0 = 8.1859e-04
Loss = 5.4922e-02, PNorm = 102.7466, GNorm = 0.9717, lr_0 = 8.1803e-04
Loss = 5.0339e-02, PNorm = 102.8163, GNorm = 0.4222, lr_0 = 8.1747e-04
Loss = 5.0248e-02, PNorm = 102.8791, GNorm = 0.3169, lr_0 = 8.1691e-04
Loss = 4.5520e-02, PNorm = 102.9351, GNorm = 0.3614, lr_0 = 8.1635e-04
Loss = 4.3026e-02, PNorm = 103.0026, GNorm = 0.6614, lr_0 = 8.1579e-04
Loss = 5.1892e-02, PNorm = 103.0671, GNorm = 0.5649, lr_0 = 8.1523e-04
Loss = 4.4503e-02, PNorm = 103.1321, GNorm = 0.3943, lr_0 = 8.1467e-04
Loss = 4.4640e-02, PNorm = 103.1950, GNorm = 0.4559, lr_0 = 8.1411e-04
Loss = 4.7234e-02, PNorm = 103.2517, GNorm = 0.5060, lr_0 = 8.1355e-04
Loss = 5.4169e-02, PNorm = 103.3212, GNorm = 0.4308, lr_0 = 8.1300e-04
Loss = 4.8911e-02, PNorm = 103.3890, GNorm = 0.5026, lr_0 = 8.1244e-04
Loss = 4.8107e-02, PNorm = 103.4614, GNorm = 0.4362, lr_0 = 8.1188e-04
Loss = 4.6276e-02, PNorm = 103.5342, GNorm = 0.4356, lr_0 = 8.1133e-04
Loss = 4.5696e-02, PNorm = 103.6030, GNorm = 0.4090, lr_0 = 8.1077e-04
Loss = 4.2786e-02, PNorm = 103.6701, GNorm = 0.5515, lr_0 = 8.1022e-04
Loss = 4.7547e-02, PNorm = 103.7372, GNorm = 0.5659, lr_0 = 8.0966e-04
Loss = 5.8318e-02, PNorm = 103.8144, GNorm = 0.5804, lr_0 = 8.0911e-04
Loss = 4.6084e-02, PNorm = 103.8834, GNorm = 0.5178, lr_0 = 8.0855e-04
Loss = 4.6902e-02, PNorm = 103.9544, GNorm = 0.3003, lr_0 = 8.0800e-04
Loss = 4.7044e-02, PNorm = 104.0262, GNorm = 0.3545, lr_0 = 8.0745e-04
Loss = 6.5086e-02, PNorm = 104.0797, GNorm = 0.4271, lr_0 = 8.0689e-04
Loss = 5.0851e-02, PNorm = 104.1496, GNorm = 0.6896, lr_0 = 8.0634e-04
Loss = 5.6010e-02, PNorm = 104.2251, GNorm = 0.3444, lr_0 = 8.0579e-04
Loss = 4.9716e-02, PNorm = 104.3096, GNorm = 0.3016, lr_0 = 8.0523e-04
Loss = 5.0125e-02, PNorm = 104.3799, GNorm = 1.2627, lr_0 = 8.0468e-04
Loss = 5.2127e-02, PNorm = 104.4635, GNorm = 0.9272, lr_0 = 8.0413e-04
Loss = 4.8714e-02, PNorm = 104.5364, GNorm = 0.3051, lr_0 = 8.0358e-04
Loss = 5.4343e-02, PNorm = 104.6198, GNorm = 0.5504, lr_0 = 8.0303e-04
Loss = 5.5221e-02, PNorm = 104.7064, GNorm = 0.4197, lr_0 = 8.0248e-04
Loss = 5.2910e-02, PNorm = 104.7814, GNorm = 0.3834, lr_0 = 8.0193e-04
Loss = 5.2539e-02, PNorm = 104.8608, GNorm = 0.5388, lr_0 = 8.0138e-04
Loss = 6.2700e-02, PNorm = 104.9428, GNorm = 0.3318, lr_0 = 8.0083e-04
Loss = 5.0710e-02, PNorm = 105.0266, GNorm = 0.3239, lr_0 = 8.0028e-04
Loss = 4.7456e-02, PNorm = 105.1089, GNorm = 0.4753, lr_0 = 7.9974e-04
Loss = 5.5910e-02, PNorm = 105.1824, GNorm = 0.5353, lr_0 = 7.9919e-04
Loss = 6.8114e-02, PNorm = 105.2782, GNorm = 1.3096, lr_0 = 7.9864e-04
Loss = 5.4668e-02, PNorm = 105.3570, GNorm = 0.6336, lr_0 = 7.9809e-04
Loss = 5.7962e-02, PNorm = 105.4415, GNorm = 0.2718, lr_0 = 7.9755e-04
Loss = 4.7302e-02, PNorm = 105.5197, GNorm = 0.3045, lr_0 = 7.9700e-04
Loss = 5.5408e-02, PNorm = 105.5988, GNorm = 0.3425, lr_0 = 7.9645e-04
Loss = 5.3778e-02, PNorm = 105.6789, GNorm = 0.3941, lr_0 = 7.9591e-04
Loss = 5.7081e-02, PNorm = 105.7547, GNorm = 0.8391, lr_0 = 7.9536e-04
Loss = 5.2139e-02, PNorm = 105.8412, GNorm = 0.2904, lr_0 = 7.9482e-04
Loss = 5.3124e-02, PNorm = 105.9252, GNorm = 0.4656, lr_0 = 7.9427e-04
Loss = 6.1932e-02, PNorm = 106.0161, GNorm = 0.4355, lr_0 = 7.9373e-04
Loss = 5.7479e-02, PNorm = 106.0977, GNorm = 0.4978, lr_0 = 7.9319e-04
Loss = 5.1040e-02, PNorm = 106.1947, GNorm = 0.4515, lr_0 = 7.9264e-04
Loss = 6.2969e-02, PNorm = 106.2801, GNorm = 0.3135, lr_0 = 7.9210e-04
Loss = 5.3818e-02, PNorm = 106.3658, GNorm = 0.5546, lr_0 = 7.9156e-04
Loss = 5.6874e-02, PNorm = 106.4451, GNorm = 0.4810, lr_0 = 7.9101e-04
Loss = 5.5000e-02, PNorm = 106.5237, GNorm = 0.5232, lr_0 = 7.9047e-04
Loss = 5.6576e-02, PNorm = 106.6031, GNorm = 0.4104, lr_0 = 7.8993e-04
Loss = 5.6256e-02, PNorm = 106.6760, GNorm = 0.3832, lr_0 = 7.8939e-04
Loss = 5.5862e-02, PNorm = 106.7659, GNorm = 0.3377, lr_0 = 7.8885e-04
Loss = 4.8481e-02, PNorm = 106.8522, GNorm = 0.3128, lr_0 = 7.8831e-04
Loss = 5.0387e-02, PNorm = 106.9346, GNorm = 0.3191, lr_0 = 7.8777e-04
Loss = 6.0300e-02, PNorm = 107.0054, GNorm = 0.6992, lr_0 = 7.8723e-04
Loss = 5.0263e-02, PNorm = 107.0874, GNorm = 0.6066, lr_0 = 7.8669e-04
Loss = 5.1094e-02, PNorm = 107.1659, GNorm = 0.2576, lr_0 = 7.8615e-04
Loss = 5.5980e-02, PNorm = 107.2443, GNorm = 0.5258, lr_0 = 7.8561e-04
Loss = 6.1012e-02, PNorm = 107.3311, GNorm = 0.6373, lr_0 = 7.8507e-04
Loss = 5.9325e-02, PNorm = 107.4160, GNorm = 0.3389, lr_0 = 7.8454e-04
Loss = 5.2317e-02, PNorm = 107.4910, GNorm = 0.3466, lr_0 = 7.8400e-04
Loss = 4.8426e-02, PNorm = 107.5648, GNorm = 0.7024, lr_0 = 7.8346e-04
Loss = 5.7934e-02, PNorm = 107.6416, GNorm = 0.3877, lr_0 = 7.8293e-04
Loss = 5.3511e-02, PNorm = 107.7232, GNorm = 0.5383, lr_0 = 7.8239e-04
Loss = 6.2207e-02, PNorm = 107.7990, GNorm = 0.7772, lr_0 = 7.8185e-04
Loss = 5.5066e-02, PNorm = 107.8934, GNorm = 0.3527, lr_0 = 7.8132e-04
Validation mae = 0.501431
Epoch 5
Loss = 3.6245e-02, PNorm = 107.9645, GNorm = 0.2771, lr_0 = 7.8078e-04
Loss = 3.9389e-02, PNorm = 108.0238, GNorm = 0.5385, lr_0 = 7.8025e-04
Loss = 4.2588e-02, PNorm = 108.0795, GNorm = 0.4518, lr_0 = 7.7971e-04
Loss = 3.9739e-02, PNorm = 108.1323, GNorm = 0.2909, lr_0 = 7.7918e-04
Loss = 3.9153e-02, PNorm = 108.1799, GNorm = 0.2859, lr_0 = 7.7864e-04
Loss = 3.7843e-02, PNorm = 108.2316, GNorm = 0.2974, lr_0 = 7.7811e-04
Loss = 3.6643e-02, PNorm = 108.2782, GNorm = 0.3204, lr_0 = 7.7758e-04
Loss = 3.9693e-02, PNorm = 108.3298, GNorm = 0.3902, lr_0 = 7.7705e-04
Loss = 3.8043e-02, PNorm = 108.3808, GNorm = 0.4136, lr_0 = 7.7651e-04
Loss = 3.5982e-02, PNorm = 108.4305, GNorm = 0.4101, lr_0 = 7.7598e-04
Loss = 3.6675e-02, PNorm = 108.4842, GNorm = 0.8600, lr_0 = 7.7545e-04
Loss = 3.9420e-02, PNorm = 108.5461, GNorm = 0.2834, lr_0 = 7.7492e-04
Loss = 3.2831e-02, PNorm = 108.5973, GNorm = 0.3854, lr_0 = 7.7439e-04
Loss = 3.8031e-02, PNorm = 108.6439, GNorm = 0.9630, lr_0 = 7.7386e-04
Loss = 3.7813e-02, PNorm = 108.6910, GNorm = 0.4510, lr_0 = 7.7333e-04
Loss = 3.4768e-02, PNorm = 108.7342, GNorm = 0.8625, lr_0 = 7.7280e-04
Loss = 4.0519e-02, PNorm = 108.7881, GNorm = 0.7005, lr_0 = 7.7227e-04
Loss = 3.3328e-02, PNorm = 108.8415, GNorm = 0.1925, lr_0 = 7.7174e-04
Loss = 3.6704e-02, PNorm = 108.8958, GNorm = 0.5110, lr_0 = 7.7121e-04
Loss = 3.2931e-02, PNorm = 108.9445, GNorm = 0.5498, lr_0 = 7.7068e-04
Loss = 3.3165e-02, PNorm = 108.9983, GNorm = 0.2500, lr_0 = 7.7015e-04
Loss = 3.3517e-02, PNorm = 109.0420, GNorm = 0.3869, lr_0 = 7.6963e-04
Loss = 3.0862e-02, PNorm = 109.0827, GNorm = 0.4558, lr_0 = 7.6910e-04
Loss = 4.0419e-02, PNorm = 109.1277, GNorm = 0.4263, lr_0 = 7.6857e-04
Loss = 3.8163e-02, PNorm = 109.1762, GNorm = 0.7287, lr_0 = 7.6805e-04
Loss = 3.6996e-02, PNorm = 109.2355, GNorm = 0.7662, lr_0 = 7.6752e-04
Loss = 4.1782e-02, PNorm = 109.2811, GNorm = 0.6264, lr_0 = 7.6699e-04
Loss = 3.7384e-02, PNorm = 109.3367, GNorm = 0.5280, lr_0 = 7.6647e-04
Loss = 3.0559e-02, PNorm = 109.3836, GNorm = 0.4309, lr_0 = 7.6594e-04
Loss = 3.2301e-02, PNorm = 109.4327, GNorm = 0.4321, lr_0 = 7.6542e-04
Loss = 3.6880e-02, PNorm = 109.4825, GNorm = 0.2264, lr_0 = 7.6489e-04
Loss = 4.0653e-02, PNorm = 109.5514, GNorm = 0.7060, lr_0 = 7.6437e-04
Loss = 3.0005e-02, PNorm = 109.6160, GNorm = 0.4888, lr_0 = 7.6385e-04
Loss = 3.5093e-02, PNorm = 109.6715, GNorm = 0.4710, lr_0 = 7.6332e-04
Loss = 3.3460e-02, PNorm = 109.7264, GNorm = 0.3852, lr_0 = 7.6280e-04
Loss = 3.7715e-02, PNorm = 109.7745, GNorm = 0.6849, lr_0 = 7.6228e-04
Loss = 3.3166e-02, PNorm = 109.8230, GNorm = 0.2385, lr_0 = 7.6176e-04
Loss = 3.4391e-02, PNorm = 109.8735, GNorm = 0.2537, lr_0 = 7.6123e-04
Loss = 3.2483e-02, PNorm = 109.9277, GNorm = 0.6380, lr_0 = 7.6071e-04
Loss = 3.8989e-02, PNorm = 109.9801, GNorm = 0.6640, lr_0 = 7.6019e-04
Loss = 4.0511e-02, PNorm = 110.0372, GNorm = 0.6106, lr_0 = 7.5967e-04
Loss = 4.0480e-02, PNorm = 110.1040, GNorm = 0.4576, lr_0 = 7.5915e-04
Loss = 3.6633e-02, PNorm = 110.1621, GNorm = 0.7174, lr_0 = 7.5863e-04
Loss = 3.6304e-02, PNorm = 110.2267, GNorm = 0.5554, lr_0 = 7.5811e-04
Loss = 3.5604e-02, PNorm = 110.2862, GNorm = 0.2547, lr_0 = 7.5759e-04
Loss = 4.0138e-02, PNorm = 110.3375, GNorm = 0.4089, lr_0 = 7.5707e-04
Loss = 3.7232e-02, PNorm = 110.3897, GNorm = 0.1723, lr_0 = 7.5655e-04
Loss = 3.6751e-02, PNorm = 110.4444, GNorm = 0.2850, lr_0 = 7.5603e-04
Loss = 4.1667e-02, PNorm = 110.5004, GNorm = 0.4646, lr_0 = 7.5552e-04
Loss = 3.5962e-02, PNorm = 110.5552, GNorm = 0.4214, lr_0 = 7.5500e-04
Loss = 3.7390e-02, PNorm = 110.6122, GNorm = 0.2747, lr_0 = 7.5448e-04
Loss = 3.2536e-02, PNorm = 110.6661, GNorm = 0.4423, lr_0 = 7.5397e-04
Loss = 3.4594e-02, PNorm = 110.7224, GNorm = 0.6300, lr_0 = 7.5345e-04
Loss = 3.5715e-02, PNorm = 110.7833, GNorm = 0.3101, lr_0 = 7.5293e-04
Loss = 3.7681e-02, PNorm = 110.8459, GNorm = 0.3128, lr_0 = 7.5242e-04
Loss = 3.4545e-02, PNorm = 110.9095, GNorm = 0.4488, lr_0 = 7.5190e-04
Loss = 3.5256e-02, PNorm = 110.9628, GNorm = 0.3811, lr_0 = 7.5139e-04
Loss = 3.8126e-02, PNorm = 111.0237, GNorm = 0.3818, lr_0 = 7.5087e-04
Loss = 3.8188e-02, PNorm = 111.0777, GNorm = 0.6264, lr_0 = 7.5036e-04
Loss = 3.7747e-02, PNorm = 111.1375, GNorm = 0.2967, lr_0 = 7.4984e-04
Loss = 3.8175e-02, PNorm = 111.1958, GNorm = 0.7836, lr_0 = 7.4933e-04
Loss = 4.1326e-02, PNorm = 111.2505, GNorm = 0.3313, lr_0 = 7.4882e-04
Loss = 3.1879e-02, PNorm = 111.3128, GNorm = 0.2141, lr_0 = 7.4830e-04
Loss = 3.6758e-02, PNorm = 111.3681, GNorm = 0.4497, lr_0 = 7.4779e-04
Loss = 3.6745e-02, PNorm = 111.4274, GNorm = 0.3806, lr_0 = 7.4728e-04
Loss = 3.6541e-02, PNorm = 111.4933, GNorm = 0.5817, lr_0 = 7.4677e-04
Loss = 3.5400e-02, PNorm = 111.5560, GNorm = 0.4350, lr_0 = 7.4625e-04
Loss = 3.3713e-02, PNorm = 111.6186, GNorm = 0.4328, lr_0 = 7.4574e-04
Loss = 4.0577e-02, PNorm = 111.6848, GNorm = 0.6677, lr_0 = 7.4523e-04
Loss = 3.2008e-02, PNorm = 111.7491, GNorm = 0.3260, lr_0 = 7.4472e-04
Loss = 3.4080e-02, PNorm = 111.8139, GNorm = 0.6406, lr_0 = 7.4421e-04
Loss = 3.3096e-02, PNorm = 111.8771, GNorm = 0.2706, lr_0 = 7.4370e-04
Loss = 3.7147e-02, PNorm = 111.9383, GNorm = 0.5215, lr_0 = 7.4319e-04
Loss = 3.6529e-02, PNorm = 111.9944, GNorm = 0.5786, lr_0 = 7.4268e-04
Loss = 3.5794e-02, PNorm = 112.0487, GNorm = 0.2105, lr_0 = 7.4217e-04
Loss = 3.5318e-02, PNorm = 112.1046, GNorm = 0.2960, lr_0 = 7.4167e-04
Loss = 3.7006e-02, PNorm = 112.1660, GNorm = 0.3499, lr_0 = 7.4116e-04
Loss = 3.9413e-02, PNorm = 112.2235, GNorm = 0.7374, lr_0 = 7.4065e-04
Loss = 3.4451e-02, PNorm = 112.2883, GNorm = 0.5069, lr_0 = 7.4014e-04
Loss = 3.6161e-02, PNorm = 112.3548, GNorm = 0.3171, lr_0 = 7.3964e-04
Loss = 4.2784e-02, PNorm = 112.4152, GNorm = 0.7236, lr_0 = 7.3913e-04
Loss = 4.6730e-02, PNorm = 112.4819, GNorm = 0.5838, lr_0 = 7.3862e-04
Loss = 3.7588e-02, PNorm = 112.5484, GNorm = 0.3894, lr_0 = 7.3812e-04
Loss = 3.6022e-02, PNorm = 112.6155, GNorm = 0.4363, lr_0 = 7.3761e-04
Loss = 3.6259e-02, PNorm = 112.6713, GNorm = 0.5196, lr_0 = 7.3711e-04
Loss = 3.9218e-02, PNorm = 112.7282, GNorm = 0.4093, lr_0 = 7.3660e-04
Loss = 3.8457e-02, PNorm = 112.7926, GNorm = 0.4076, lr_0 = 7.3610e-04
Loss = 3.8036e-02, PNorm = 112.8580, GNorm = 0.4949, lr_0 = 7.3559e-04
Loss = 4.3890e-02, PNorm = 112.9237, GNorm = 0.5600, lr_0 = 7.3509e-04
Loss = 4.3756e-02, PNorm = 112.9928, GNorm = 0.2780, lr_0 = 7.3458e-04
Loss = 3.9125e-02, PNorm = 113.0678, GNorm = 0.3790, lr_0 = 7.3408e-04
Loss = 4.1331e-02, PNorm = 113.1422, GNorm = 0.5021, lr_0 = 7.3358e-04
Loss = 4.0291e-02, PNorm = 113.2084, GNorm = 0.7114, lr_0 = 7.3308e-04
Loss = 3.5317e-02, PNorm = 113.2757, GNorm = 0.3232, lr_0 = 7.3257e-04
Loss = 3.7914e-02, PNorm = 113.3414, GNorm = 0.4859, lr_0 = 7.3207e-04
Loss = 4.2091e-02, PNorm = 113.4146, GNorm = 0.5695, lr_0 = 7.3157e-04
Loss = 3.4835e-02, PNorm = 113.4806, GNorm = 0.3333, lr_0 = 7.3107e-04
Loss = 3.4076e-02, PNorm = 113.5416, GNorm = 0.2562, lr_0 = 7.3057e-04
Loss = 3.9563e-02, PNorm = 113.6059, GNorm = 0.3212, lr_0 = 7.3007e-04
Loss = 4.1826e-02, PNorm = 113.6693, GNorm = 0.6166, lr_0 = 7.2957e-04
Loss = 3.9809e-02, PNorm = 113.7410, GNorm = 0.4470, lr_0 = 7.2907e-04
Loss = 3.5988e-02, PNorm = 113.8158, GNorm = 0.4110, lr_0 = 7.2857e-04
Loss = 4.0468e-02, PNorm = 113.8965, GNorm = 0.5494, lr_0 = 7.2807e-04
Loss = 3.7783e-02, PNorm = 113.9755, GNorm = 0.6338, lr_0 = 7.2757e-04
Loss = 3.9816e-02, PNorm = 114.0533, GNorm = 0.3406, lr_0 = 7.2707e-04
Loss = 3.4853e-02, PNorm = 114.1270, GNorm = 0.3451, lr_0 = 7.2657e-04
Loss = 3.8446e-02, PNorm = 114.1979, GNorm = 0.5352, lr_0 = 7.2608e-04
Loss = 3.9721e-02, PNorm = 114.2720, GNorm = 0.3657, lr_0 = 7.2558e-04
Loss = 3.8690e-02, PNorm = 114.3393, GNorm = 0.3722, lr_0 = 7.2508e-04
Loss = 3.5942e-02, PNorm = 114.4164, GNorm = 0.3653, lr_0 = 7.2458e-04
Loss = 4.2188e-02, PNorm = 114.4853, GNorm = 0.4000, lr_0 = 7.2409e-04
Loss = 4.0194e-02, PNorm = 114.5593, GNorm = 0.2885, lr_0 = 7.2359e-04
Loss = 4.4348e-02, PNorm = 114.6216, GNorm = 0.5360, lr_0 = 7.2310e-04
Loss = 4.3204e-02, PNorm = 114.6872, GNorm = 0.3535, lr_0 = 7.2260e-04
Loss = 4.4142e-02, PNorm = 114.7667, GNorm = 0.3523, lr_0 = 7.2211e-04
Loss = 4.5691e-02, PNorm = 114.8412, GNorm = 0.8958, lr_0 = 7.2161e-04
Loss = 3.7699e-02, PNorm = 114.9188, GNorm = 0.2440, lr_0 = 7.2112e-04
Loss = 4.2955e-02, PNorm = 114.9885, GNorm = 1.0825, lr_0 = 7.2062e-04
Loss = 3.7226e-02, PNorm = 115.0600, GNorm = 0.3279, lr_0 = 7.2013e-04
Loss = 4.2311e-02, PNorm = 115.1236, GNorm = 0.6739, lr_0 = 7.1964e-04
Validation mae = 0.499010
Epoch 6
Loss = 3.1216e-02, PNorm = 115.1787, GNorm = 0.3848, lr_0 = 7.1914e-04
Loss = 3.3505e-02, PNorm = 115.2253, GNorm = 0.3839, lr_0 = 7.1865e-04
Loss = 3.9667e-02, PNorm = 115.2644, GNorm = 0.3268, lr_0 = 7.1816e-04
Loss = 2.7482e-02, PNorm = 115.3083, GNorm = 0.1633, lr_0 = 7.1767e-04
Loss = 3.1511e-02, PNorm = 115.3566, GNorm = 0.3539, lr_0 = 7.1717e-04
Loss = 2.5613e-02, PNorm = 115.4106, GNorm = 0.3983, lr_0 = 7.1668e-04
Loss = 2.7931e-02, PNorm = 115.4585, GNorm = 0.4269, lr_0 = 7.1619e-04
Loss = 2.6174e-02, PNorm = 115.5040, GNorm = 0.2903, lr_0 = 7.1570e-04
Loss = 2.5757e-02, PNorm = 115.5439, GNorm = 0.6432, lr_0 = 7.1521e-04
Loss = 2.6861e-02, PNorm = 115.5848, GNorm = 0.8444, lr_0 = 7.1472e-04
Loss = 2.5231e-02, PNorm = 115.6280, GNorm = 0.3877, lr_0 = 7.1423e-04
Loss = 2.8941e-02, PNorm = 115.6718, GNorm = 0.4070, lr_0 = 7.1374e-04
Loss = 2.5974e-02, PNorm = 115.7187, GNorm = 0.4826, lr_0 = 7.1325e-04
Loss = 3.0196e-02, PNorm = 115.7570, GNorm = 0.5747, lr_0 = 7.1277e-04
Loss = 2.5772e-02, PNorm = 115.8033, GNorm = 0.3588, lr_0 = 7.1228e-04
Loss = 3.0380e-02, PNorm = 115.8447, GNorm = 0.2709, lr_0 = 7.1179e-04
Loss = 2.9786e-02, PNorm = 115.8868, GNorm = 0.5997, lr_0 = 7.1130e-04
Loss = 2.6045e-02, PNorm = 115.9364, GNorm = 0.4250, lr_0 = 7.1081e-04
Loss = 2.6434e-02, PNorm = 115.9825, GNorm = 0.2404, lr_0 = 7.1033e-04
Loss = 2.7926e-02, PNorm = 116.0290, GNorm = 0.5186, lr_0 = 7.0984e-04
Loss = 2.2142e-02, PNorm = 116.0725, GNorm = 0.4636, lr_0 = 7.0935e-04
Loss = 2.6424e-02, PNorm = 116.1159, GNorm = 0.6933, lr_0 = 7.0887e-04
Loss = 2.7939e-02, PNorm = 116.1553, GNorm = 0.3441, lr_0 = 7.0838e-04
Loss = 2.7494e-02, PNorm = 116.1978, GNorm = 0.3363, lr_0 = 7.0790e-04
Loss = 2.6158e-02, PNorm = 116.2447, GNorm = 0.3518, lr_0 = 7.0741e-04
Loss = 2.5861e-02, PNorm = 116.2870, GNorm = 0.5532, lr_0 = 7.0693e-04
Loss = 2.7322e-02, PNorm = 116.3314, GNorm = 0.3743, lr_0 = 7.0644e-04
Loss = 3.0629e-02, PNorm = 116.3798, GNorm = 0.5607, lr_0 = 7.0596e-04
Loss = 2.4196e-02, PNorm = 116.4245, GNorm = 0.3238, lr_0 = 7.0548e-04
Loss = 2.6338e-02, PNorm = 116.4685, GNorm = 0.3136, lr_0 = 7.0499e-04
Loss = 2.2613e-02, PNorm = 116.5162, GNorm = 0.2404, lr_0 = 7.0451e-04
Loss = 2.8670e-02, PNorm = 116.5600, GNorm = 0.1827, lr_0 = 7.0403e-04
Loss = 3.0028e-02, PNorm = 116.6003, GNorm = 0.5096, lr_0 = 7.0354e-04
Loss = 3.4844e-02, PNorm = 116.6416, GNorm = 0.2032, lr_0 = 7.0306e-04
Loss = 3.0674e-02, PNorm = 116.6860, GNorm = 0.4229, lr_0 = 7.0258e-04
Loss = 3.0561e-02, PNorm = 116.7306, GNorm = 0.2013, lr_0 = 7.0210e-04
Loss = 3.6343e-02, PNorm = 116.7728, GNorm = 0.2682, lr_0 = 7.0162e-04
Loss = 2.7267e-02, PNorm = 116.8202, GNorm = 0.2526, lr_0 = 7.0114e-04
Loss = 2.5862e-02, PNorm = 116.8705, GNorm = 0.2124, lr_0 = 7.0066e-04
Loss = 2.7237e-02, PNorm = 116.9236, GNorm = 0.3303, lr_0 = 7.0018e-04
Loss = 2.9509e-02, PNorm = 116.9739, GNorm = 0.4894, lr_0 = 6.9970e-04
Loss = 2.5094e-02, PNorm = 117.0190, GNorm = 0.3178, lr_0 = 6.9922e-04
Loss = 2.7393e-02, PNorm = 117.0640, GNorm = 0.1952, lr_0 = 6.9874e-04
Loss = 2.7847e-02, PNorm = 117.1195, GNorm = 0.2171, lr_0 = 6.9826e-04
Loss = 2.9244e-02, PNorm = 117.1739, GNorm = 0.4275, lr_0 = 6.9778e-04
Loss = 2.8036e-02, PNorm = 117.2227, GNorm = 0.3403, lr_0 = 6.9730e-04
Loss = 2.4961e-02, PNorm = 117.2661, GNorm = 0.1653, lr_0 = 6.9683e-04
Loss = 2.6959e-02, PNorm = 117.3161, GNorm = 0.2971, lr_0 = 6.9635e-04
Loss = 2.9910e-02, PNorm = 117.3638, GNorm = 0.4765, lr_0 = 6.9587e-04
Loss = 2.5934e-02, PNorm = 117.4150, GNorm = 0.4927, lr_0 = 6.9540e-04
Loss = 2.5554e-02, PNorm = 117.4637, GNorm = 0.2103, lr_0 = 6.9492e-04
Loss = 3.1093e-02, PNorm = 117.5100, GNorm = 0.2689, lr_0 = 6.9444e-04
Loss = 3.2254e-02, PNorm = 117.5605, GNorm = 0.2913, lr_0 = 6.9397e-04
Loss = 3.0005e-02, PNorm = 117.6136, GNorm = 0.6097, lr_0 = 6.9349e-04
Loss = 2.8744e-02, PNorm = 117.6734, GNorm = 0.2158, lr_0 = 6.9302e-04
Loss = 2.6893e-02, PNorm = 117.7268, GNorm = 0.2012, lr_0 = 6.9254e-04
Loss = 2.6747e-02, PNorm = 117.7837, GNorm = 0.4830, lr_0 = 6.9207e-04
Loss = 2.6599e-02, PNorm = 117.8344, GNorm = 0.2745, lr_0 = 6.9159e-04
Loss = 2.6382e-02, PNorm = 117.8872, GNorm = 0.7453, lr_0 = 6.9112e-04
Loss = 2.8375e-02, PNorm = 117.9321, GNorm = 0.2941, lr_0 = 6.9065e-04
Loss = 2.9528e-02, PNorm = 117.9839, GNorm = 0.2911, lr_0 = 6.9017e-04
Loss = 2.6367e-02, PNorm = 118.0295, GNorm = 0.2607, lr_0 = 6.8970e-04
Loss = 2.8929e-02, PNorm = 118.0843, GNorm = 0.5980, lr_0 = 6.8923e-04
Loss = 3.0986e-02, PNorm = 118.1325, GNorm = 0.5408, lr_0 = 6.8876e-04
Loss = 3.0773e-02, PNorm = 118.1929, GNorm = 0.2083, lr_0 = 6.8828e-04
Loss = 2.8150e-02, PNorm = 118.2512, GNorm = 0.3701, lr_0 = 6.8781e-04
Loss = 3.0298e-02, PNorm = 118.2959, GNorm = 0.3220, lr_0 = 6.8734e-04
Loss = 2.9160e-02, PNorm = 118.3526, GNorm = 0.1881, lr_0 = 6.8687e-04
Loss = 3.5555e-02, PNorm = 118.4172, GNorm = 0.8325, lr_0 = 6.8640e-04
Loss = 2.5825e-02, PNorm = 118.4757, GNorm = 0.2187, lr_0 = 6.8593e-04
Loss = 3.0004e-02, PNorm = 118.5308, GNorm = 0.4098, lr_0 = 6.8546e-04
Loss = 2.6914e-02, PNorm = 118.5826, GNorm = 0.3259, lr_0 = 6.8499e-04
Loss = 3.0182e-02, PNorm = 118.6357, GNorm = 0.3611, lr_0 = 6.8452e-04
Loss = 2.8980e-02, PNorm = 118.6932, GNorm = 0.4824, lr_0 = 6.8405e-04
Loss = 3.2044e-02, PNorm = 118.7516, GNorm = 0.5287, lr_0 = 6.8358e-04
Loss = 2.7004e-02, PNorm = 118.8088, GNorm = 0.1821, lr_0 = 6.8312e-04
Loss = 2.8534e-02, PNorm = 118.8606, GNorm = 0.2856, lr_0 = 6.8265e-04
Loss = 3.0033e-02, PNorm = 118.9074, GNorm = 0.2020, lr_0 = 6.8218e-04
Loss = 2.5783e-02, PNorm = 118.9622, GNorm = 0.3529, lr_0 = 6.8171e-04
Loss = 2.7819e-02, PNorm = 119.0124, GNorm = 0.3879, lr_0 = 6.8125e-04
Loss = 2.5395e-02, PNorm = 119.0700, GNorm = 0.5852, lr_0 = 6.8078e-04
Loss = 3.3221e-02, PNorm = 119.1292, GNorm = 0.2412, lr_0 = 6.8031e-04
Loss = 3.0085e-02, PNorm = 119.1860, GNorm = 0.3123, lr_0 = 6.7985e-04
Loss = 3.6276e-02, PNorm = 119.2561, GNorm = 0.3226, lr_0 = 6.7938e-04
Loss = 3.2743e-02, PNorm = 119.3183, GNorm = 0.8670, lr_0 = 6.7892e-04
Loss = 2.9194e-02, PNorm = 119.3715, GNorm = 0.2964, lr_0 = 6.7845e-04
Loss = 3.1722e-02, PNorm = 119.4326, GNorm = 0.5821, lr_0 = 6.7799e-04
Loss = 3.0549e-02, PNorm = 119.4875, GNorm = 0.4899, lr_0 = 6.7752e-04
Loss = 3.0170e-02, PNorm = 119.5494, GNorm = 0.7321, lr_0 = 6.7706e-04
Loss = 2.7028e-02, PNorm = 119.6071, GNorm = 0.3262, lr_0 = 6.7659e-04
Loss = 3.3183e-02, PNorm = 119.6565, GNorm = 0.5978, lr_0 = 6.7613e-04
Loss = 2.8325e-02, PNorm = 119.7182, GNorm = 0.5723, lr_0 = 6.7567e-04
Loss = 2.8112e-02, PNorm = 119.7755, GNorm = 0.5752, lr_0 = 6.7520e-04
Loss = 3.0873e-02, PNorm = 119.8360, GNorm = 0.4684, lr_0 = 6.7474e-04
Loss = 2.6707e-02, PNorm = 119.8939, GNorm = 0.3816, lr_0 = 6.7428e-04
Loss = 2.9982e-02, PNorm = 119.9460, GNorm = 0.5298, lr_0 = 6.7382e-04
Loss = 2.7473e-02, PNorm = 119.9972, GNorm = 0.6378, lr_0 = 6.7335e-04
Loss = 3.0306e-02, PNorm = 120.0539, GNorm = 0.5531, lr_0 = 6.7289e-04
Loss = 3.0748e-02, PNorm = 120.1153, GNorm = 0.5798, lr_0 = 6.7243e-04
Loss = 3.4618e-02, PNorm = 120.1701, GNorm = 0.4901, lr_0 = 6.7197e-04
Loss = 3.0505e-02, PNorm = 120.2361, GNorm = 0.6580, lr_0 = 6.7151e-04
Loss = 3.2308e-02, PNorm = 120.3068, GNorm = 0.2420, lr_0 = 6.7105e-04
Loss = 3.2022e-02, PNorm = 120.3824, GNorm = 0.2306, lr_0 = 6.7059e-04
Loss = 3.1332e-02, PNorm = 120.4451, GNorm = 0.4600, lr_0 = 6.7013e-04
Loss = 3.1711e-02, PNorm = 120.5118, GNorm = 0.5742, lr_0 = 6.6967e-04
Loss = 3.2804e-02, PNorm = 120.5746, GNorm = 0.7041, lr_0 = 6.6921e-04
Loss = 3.2557e-02, PNorm = 120.6381, GNorm = 0.5671, lr_0 = 6.6876e-04
Loss = 3.2372e-02, PNorm = 120.7016, GNorm = 0.6322, lr_0 = 6.6830e-04
Loss = 2.9456e-02, PNorm = 120.7676, GNorm = 0.2786, lr_0 = 6.6784e-04
Loss = 2.5824e-02, PNorm = 120.8306, GNorm = 0.2242, lr_0 = 6.6738e-04
Loss = 3.0952e-02, PNorm = 120.8895, GNorm = 0.4294, lr_0 = 6.6693e-04
Loss = 2.4988e-02, PNorm = 120.9414, GNorm = 0.4969, lr_0 = 6.6647e-04
Loss = 2.8002e-02, PNorm = 120.9950, GNorm = 0.3464, lr_0 = 6.6601e-04
Loss = 3.2178e-02, PNorm = 121.0511, GNorm = 0.6611, lr_0 = 6.6556e-04
Loss = 3.3526e-02, PNorm = 121.1194, GNorm = 0.8983, lr_0 = 6.6510e-04
Loss = 3.0324e-02, PNorm = 121.1785, GNorm = 0.2737, lr_0 = 6.6464e-04
Loss = 3.8388e-02, PNorm = 121.2362, GNorm = 0.5131, lr_0 = 6.6419e-04
Loss = 2.9382e-02, PNorm = 121.3020, GNorm = 0.4230, lr_0 = 6.6373e-04
Loss = 3.1085e-02, PNorm = 121.3670, GNorm = 1.0300, lr_0 = 6.6328e-04
Loss = 2.7663e-02, PNorm = 121.4349, GNorm = 0.2258, lr_0 = 6.6282e-04
Validation mae = 0.490964
Epoch 7
Loss = 2.6605e-02, PNorm = 121.4866, GNorm = 0.4336, lr_0 = 6.6237e-04
Loss = 2.7016e-02, PNorm = 121.5322, GNorm = 0.3177, lr_0 = 6.6192e-04
Loss = 2.3602e-02, PNorm = 121.5685, GNorm = 0.3150, lr_0 = 6.6146e-04
Loss = 2.5421e-02, PNorm = 121.6080, GNorm = 0.4509, lr_0 = 6.6101e-04
Loss = 2.3001e-02, PNorm = 121.6434, GNorm = 0.4146, lr_0 = 6.6056e-04
Loss = 2.4106e-02, PNorm = 121.6857, GNorm = 0.6096, lr_0 = 6.6011e-04
Loss = 2.2513e-02, PNorm = 121.7237, GNorm = 0.3655, lr_0 = 6.5965e-04
Loss = 2.2005e-02, PNorm = 121.7679, GNorm = 0.2872, lr_0 = 6.5920e-04
Loss = 2.4178e-02, PNorm = 121.8052, GNorm = 0.4712, lr_0 = 6.5875e-04
Loss = 2.3694e-02, PNorm = 121.8392, GNorm = 0.2405, lr_0 = 6.5830e-04
Loss = 2.2698e-02, PNorm = 121.8757, GNorm = 0.3872, lr_0 = 6.5785e-04
Loss = 2.2617e-02, PNorm = 121.9144, GNorm = 0.4338, lr_0 = 6.5740e-04
Loss = 2.1414e-02, PNorm = 121.9542, GNorm = 0.2675, lr_0 = 6.5695e-04
Loss = 2.6124e-02, PNorm = 121.9841, GNorm = 0.3160, lr_0 = 6.5650e-04
Loss = 2.4398e-02, PNorm = 122.0292, GNorm = 0.2129, lr_0 = 6.5605e-04
Loss = 2.4344e-02, PNorm = 122.0777, GNorm = 0.3100, lr_0 = 6.5560e-04
Loss = 2.3796e-02, PNorm = 122.1294, GNorm = 0.1917, lr_0 = 6.5515e-04
Loss = 2.5357e-02, PNorm = 122.1665, GNorm = 0.3042, lr_0 = 6.5470e-04
Loss = 2.0643e-02, PNorm = 122.2045, GNorm = 0.3393, lr_0 = 6.5425e-04
Loss = 2.0122e-02, PNorm = 122.2440, GNorm = 0.2843, lr_0 = 6.5380e-04
Loss = 1.7397e-02, PNorm = 122.2807, GNorm = 0.3928, lr_0 = 6.5335e-04
Loss = 1.8890e-02, PNorm = 122.3127, GNorm = 0.4760, lr_0 = 6.5291e-04
Loss = 2.0061e-02, PNorm = 122.3453, GNorm = 0.1511, lr_0 = 6.5246e-04
Loss = 2.0089e-02, PNorm = 122.3845, GNorm = 0.2086, lr_0 = 6.5201e-04
Loss = 2.2024e-02, PNorm = 122.4130, GNorm = 0.7672, lr_0 = 6.5157e-04
Loss = 2.0169e-02, PNorm = 122.4497, GNorm = 0.3899, lr_0 = 6.5112e-04
Loss = 2.8951e-02, PNorm = 122.4936, GNorm = 0.9816, lr_0 = 6.5067e-04
Loss = 2.4774e-02, PNorm = 122.5410, GNorm = 0.6449, lr_0 = 6.5023e-04
Loss = 2.3124e-02, PNorm = 122.5796, GNorm = 0.2359, lr_0 = 6.4978e-04
Loss = 2.2547e-02, PNorm = 122.6259, GNorm = 0.6291, lr_0 = 6.4934e-04
Loss = 2.4280e-02, PNorm = 122.6673, GNorm = 0.3596, lr_0 = 6.4889e-04
Loss = 2.3726e-02, PNorm = 122.7181, GNorm = 0.3645, lr_0 = 6.4845e-04
Loss = 1.9326e-02, PNorm = 122.7597, GNorm = 0.3339, lr_0 = 6.4800e-04
Loss = 2.2130e-02, PNorm = 122.8086, GNorm = 0.3886, lr_0 = 6.4756e-04
Loss = 2.1053e-02, PNorm = 122.8555, GNorm = 0.4169, lr_0 = 6.4712e-04
Loss = 2.3924e-02, PNorm = 122.8980, GNorm = 0.1248, lr_0 = 6.4667e-04
Loss = 2.3775e-02, PNorm = 122.9387, GNorm = 0.4978, lr_0 = 6.4623e-04
Loss = 2.2999e-02, PNorm = 122.9772, GNorm = 0.2054, lr_0 = 6.4579e-04
Loss = 2.1710e-02, PNorm = 123.0231, GNorm = 0.3107, lr_0 = 6.4534e-04
Loss = 2.1155e-02, PNorm = 123.0625, GNorm = 0.4914, lr_0 = 6.4490e-04
Loss = 2.2797e-02, PNorm = 123.1048, GNorm = 0.4025, lr_0 = 6.4446e-04
Loss = 1.8664e-02, PNorm = 123.1494, GNorm = 0.4277, lr_0 = 6.4402e-04
Loss = 2.4303e-02, PNorm = 123.1927, GNorm = 0.5143, lr_0 = 6.4358e-04
Loss = 2.1778e-02, PNorm = 123.2299, GNorm = 0.4385, lr_0 = 6.4314e-04
Loss = 2.3166e-02, PNorm = 123.2715, GNorm = 0.9661, lr_0 = 6.4270e-04
Loss = 2.1998e-02, PNorm = 123.3120, GNorm = 0.4282, lr_0 = 6.4226e-04
Loss = 2.4434e-02, PNorm = 123.3582, GNorm = 0.2690, lr_0 = 6.4182e-04
Loss = 2.1275e-02, PNorm = 123.4056, GNorm = 0.3338, lr_0 = 6.4138e-04
Loss = 2.2925e-02, PNorm = 123.4475, GNorm = 0.6791, lr_0 = 6.4094e-04
Loss = 2.7041e-02, PNorm = 123.4859, GNorm = 0.5275, lr_0 = 6.4050e-04
Loss = 2.1965e-02, PNorm = 123.5355, GNorm = 0.2034, lr_0 = 6.4006e-04
Loss = 2.1607e-02, PNorm = 123.5836, GNorm = 0.3459, lr_0 = 6.3962e-04
Loss = 2.5514e-02, PNorm = 123.6299, GNorm = 0.3165, lr_0 = 6.3918e-04
Loss = 2.0137e-02, PNorm = 123.6751, GNorm = 0.1772, lr_0 = 6.3874e-04
Loss = 2.6238e-02, PNorm = 123.7227, GNorm = 0.5468, lr_0 = 6.3831e-04
Loss = 2.2333e-02, PNorm = 123.7725, GNorm = 0.3019, lr_0 = 6.3787e-04
Loss = 2.3991e-02, PNorm = 123.8207, GNorm = 0.2945, lr_0 = 6.3743e-04
Loss = 2.7410e-02, PNorm = 123.8698, GNorm = 0.4429, lr_0 = 6.3700e-04
Loss = 3.0399e-02, PNorm = 123.9128, GNorm = 0.6356, lr_0 = 6.3656e-04
Loss = 2.5634e-02, PNorm = 123.9596, GNorm = 0.2361, lr_0 = 6.3612e-04
Loss = 2.1525e-02, PNorm = 124.0065, GNorm = 0.1921, lr_0 = 6.3569e-04
Loss = 2.0181e-02, PNorm = 124.0493, GNorm = 0.5483, lr_0 = 6.3525e-04
Loss = 2.4309e-02, PNorm = 124.0923, GNorm = 0.6201, lr_0 = 6.3482e-04
Loss = 2.5186e-02, PNorm = 124.1396, GNorm = 0.5736, lr_0 = 6.3438e-04
Loss = 2.6305e-02, PNorm = 124.1884, GNorm = 0.3411, lr_0 = 6.3395e-04
Loss = 2.6010e-02, PNorm = 124.2416, GNorm = 0.2076, lr_0 = 6.3351e-04
Loss = 2.1731e-02, PNorm = 124.2870, GNorm = 0.1603, lr_0 = 6.3308e-04
Loss = 2.4934e-02, PNorm = 124.3303, GNorm = 0.3764, lr_0 = 6.3265e-04
Loss = 2.2686e-02, PNorm = 124.3771, GNorm = 0.4430, lr_0 = 6.3221e-04
Loss = 2.1519e-02, PNorm = 124.4165, GNorm = 0.3016, lr_0 = 6.3178e-04
Loss = 2.0251e-02, PNorm = 124.4572, GNorm = 0.3543, lr_0 = 6.3135e-04
Loss = 2.2870e-02, PNorm = 124.5066, GNorm = 0.3656, lr_0 = 6.3091e-04
Loss = 2.0121e-02, PNorm = 124.5546, GNorm = 0.2036, lr_0 = 6.3048e-04
Loss = 2.5460e-02, PNorm = 124.5935, GNorm = 0.4463, lr_0 = 6.3005e-04
Loss = 2.4705e-02, PNorm = 124.6372, GNorm = 0.5685, lr_0 = 6.2962e-04
Loss = 2.3603e-02, PNorm = 124.6855, GNorm = 0.2240, lr_0 = 6.2919e-04
Loss = 2.7756e-02, PNorm = 124.7265, GNorm = 0.5398, lr_0 = 6.2876e-04
Loss = 2.2960e-02, PNorm = 124.7708, GNorm = 0.4250, lr_0 = 6.2833e-04
Loss = 2.1128e-02, PNorm = 124.8170, GNorm = 0.3570, lr_0 = 6.2789e-04
Loss = 2.4522e-02, PNorm = 124.8668, GNorm = 0.1504, lr_0 = 6.2746e-04
Loss = 2.2622e-02, PNorm = 124.9120, GNorm = 0.4787, lr_0 = 6.2703e-04
Loss = 2.2639e-02, PNorm = 124.9576, GNorm = 0.2866, lr_0 = 6.2661e-04
Loss = 2.2110e-02, PNorm = 125.0081, GNorm = 0.6871, lr_0 = 6.2618e-04
Loss = 2.1630e-02, PNorm = 125.0564, GNorm = 0.2494, lr_0 = 6.2575e-04
Loss = 2.6326e-02, PNorm = 125.1055, GNorm = 0.1922, lr_0 = 6.2532e-04
Loss = 2.3761e-02, PNorm = 125.1610, GNorm = 0.3593, lr_0 = 6.2489e-04
Loss = 2.1954e-02, PNorm = 125.2114, GNorm = 0.2954, lr_0 = 6.2446e-04
Loss = 2.4473e-02, PNorm = 125.2601, GNorm = 0.8635, lr_0 = 6.2403e-04
Loss = 2.2997e-02, PNorm = 125.3082, GNorm = 0.5775, lr_0 = 6.2361e-04
Loss = 2.2889e-02, PNorm = 125.3560, GNorm = 0.2680, lr_0 = 6.2318e-04
Loss = 2.2303e-02, PNorm = 125.3959, GNorm = 0.4211, lr_0 = 6.2275e-04
Loss = 2.2000e-02, PNorm = 125.4464, GNorm = 0.2662, lr_0 = 6.2233e-04
Loss = 2.5518e-02, PNorm = 125.4989, GNorm = 0.1567, lr_0 = 6.2190e-04
Loss = 2.2441e-02, PNorm = 125.5458, GNorm = 0.4591, lr_0 = 6.2147e-04
Loss = 2.2329e-02, PNorm = 125.5982, GNorm = 0.3863, lr_0 = 6.2105e-04
Loss = 2.6420e-02, PNorm = 125.6448, GNorm = 0.4357, lr_0 = 6.2062e-04
Loss = 2.2228e-02, PNorm = 125.6954, GNorm = 0.3731, lr_0 = 6.2020e-04
Loss = 2.0710e-02, PNorm = 125.7544, GNorm = 0.2926, lr_0 = 6.1977e-04
Loss = 2.7454e-02, PNorm = 125.8017, GNorm = 0.7070, lr_0 = 6.1935e-04
Loss = 2.0454e-02, PNorm = 125.8516, GNorm = 0.6025, lr_0 = 6.1892e-04
Loss = 2.3204e-02, PNorm = 125.9046, GNorm = 0.2468, lr_0 = 6.1850e-04
Loss = 2.3617e-02, PNorm = 125.9561, GNorm = 0.3886, lr_0 = 6.1808e-04
Loss = 2.3835e-02, PNorm = 126.0044, GNorm = 0.2090, lr_0 = 6.1765e-04
Loss = 2.0918e-02, PNorm = 126.0550, GNorm = 0.5982, lr_0 = 6.1723e-04
Loss = 2.4467e-02, PNorm = 126.1063, GNorm = 0.2380, lr_0 = 6.1681e-04
Loss = 2.2195e-02, PNorm = 126.1571, GNorm = 0.5148, lr_0 = 6.1638e-04
Loss = 2.2694e-02, PNorm = 126.2139, GNorm = 0.1846, lr_0 = 6.1596e-04
Loss = 2.4017e-02, PNorm = 126.2643, GNorm = 0.2207, lr_0 = 6.1554e-04
Loss = 2.4482e-02, PNorm = 126.3140, GNorm = 0.3954, lr_0 = 6.1512e-04
Loss = 2.4103e-02, PNorm = 126.3618, GNorm = 0.6441, lr_0 = 6.1470e-04
Loss = 2.6783e-02, PNorm = 126.4146, GNorm = 0.5116, lr_0 = 6.1428e-04
Loss = 2.3530e-02, PNorm = 126.4627, GNorm = 0.6619, lr_0 = 6.1385e-04
Loss = 2.7968e-02, PNorm = 126.5152, GNorm = 0.4616, lr_0 = 6.1343e-04
Loss = 2.1452e-02, PNorm = 126.5652, GNorm = 0.2657, lr_0 = 6.1301e-04
Loss = 2.4735e-02, PNorm = 126.6151, GNorm = 0.4151, lr_0 = 6.1259e-04
Loss = 2.4217e-02, PNorm = 126.6636, GNorm = 0.3788, lr_0 = 6.1217e-04
Loss = 2.4316e-02, PNorm = 126.7202, GNorm = 0.1976, lr_0 = 6.1175e-04
Loss = 2.3579e-02, PNorm = 126.7670, GNorm = 0.6598, lr_0 = 6.1134e-04
Loss = 2.2220e-02, PNorm = 126.8144, GNorm = 0.2271, lr_0 = 6.1092e-04
Loss = 2.2846e-02, PNorm = 126.8701, GNorm = 0.2342, lr_0 = 6.1050e-04
Validation mae = 0.485781
Epoch 8
Loss = 1.9287e-02, PNorm = 126.9104, GNorm = 0.4216, lr_0 = 6.1008e-04
Loss = 2.2960e-02, PNorm = 126.9442, GNorm = 0.3744, lr_0 = 6.0966e-04
Loss = 1.7108e-02, PNorm = 126.9728, GNorm = 0.3409, lr_0 = 6.0924e-04
Loss = 2.0774e-02, PNorm = 127.0093, GNorm = 1.1521, lr_0 = 6.0883e-04
Loss = 2.0379e-02, PNorm = 127.0497, GNorm = 0.2438, lr_0 = 6.0841e-04
Loss = 1.9261e-02, PNorm = 127.0854, GNorm = 0.4834, lr_0 = 6.0799e-04
Loss = 1.6477e-02, PNorm = 127.1180, GNorm = 0.4905, lr_0 = 6.0758e-04
Loss = 1.9098e-02, PNorm = 127.1444, GNorm = 0.1994, lr_0 = 6.0716e-04
Loss = 1.8320e-02, PNorm = 127.1792, GNorm = 0.1824, lr_0 = 6.0674e-04
Loss = 2.0247e-02, PNorm = 127.2130, GNorm = 0.2998, lr_0 = 6.0633e-04
Loss = 1.5870e-02, PNorm = 127.2424, GNorm = 0.2231, lr_0 = 6.0591e-04
Loss = 1.7170e-02, PNorm = 127.2739, GNorm = 0.3262, lr_0 = 6.0550e-04
Loss = 1.6553e-02, PNorm = 127.3094, GNorm = 0.2437, lr_0 = 6.0508e-04
Loss = 1.9662e-02, PNorm = 127.3416, GNorm = 0.4876, lr_0 = 6.0467e-04
Loss = 1.5773e-02, PNorm = 127.3706, GNorm = 0.2847, lr_0 = 6.0425e-04
Loss = 1.6652e-02, PNorm = 127.3979, GNorm = 0.1745, lr_0 = 6.0384e-04
Loss = 1.8225e-02, PNorm = 127.4278, GNorm = 0.7376, lr_0 = 6.0343e-04
Loss = 1.8872e-02, PNorm = 127.4631, GNorm = 0.1651, lr_0 = 6.0301e-04
Loss = 1.8829e-02, PNorm = 127.5000, GNorm = 0.1597, lr_0 = 6.0260e-04
Loss = 1.8568e-02, PNorm = 127.5363, GNorm = 0.1900, lr_0 = 6.0219e-04
Loss = 1.7581e-02, PNorm = 127.5762, GNorm = 0.3613, lr_0 = 6.0178e-04
Loss = 1.5497e-02, PNorm = 127.6107, GNorm = 0.3612, lr_0 = 6.0136e-04
Loss = 1.9884e-02, PNorm = 127.6407, GNorm = 0.3200, lr_0 = 6.0095e-04
Loss = 1.8035e-02, PNorm = 127.6776, GNorm = 0.5953, lr_0 = 6.0054e-04
Loss = 1.8015e-02, PNorm = 127.7086, GNorm = 0.3703, lr_0 = 6.0013e-04
Loss = 1.9710e-02, PNorm = 127.7427, GNorm = 0.3175, lr_0 = 5.9972e-04
Loss = 1.8067e-02, PNorm = 127.7807, GNorm = 0.2795, lr_0 = 5.9931e-04
Loss = 2.0406e-02, PNorm = 127.8154, GNorm = 0.5384, lr_0 = 5.9890e-04
Loss = 1.9954e-02, PNorm = 127.8548, GNorm = 0.3574, lr_0 = 5.9849e-04
Loss = 1.8245e-02, PNorm = 127.8953, GNorm = 0.2128, lr_0 = 5.9808e-04
Loss = 1.8672e-02, PNorm = 127.9343, GNorm = 0.3035, lr_0 = 5.9767e-04
Loss = 1.8264e-02, PNorm = 127.9722, GNorm = 0.2145, lr_0 = 5.9726e-04
Loss = 1.7712e-02, PNorm = 128.0041, GNorm = 0.4818, lr_0 = 5.9685e-04
Loss = 1.5583e-02, PNorm = 128.0388, GNorm = 0.4289, lr_0 = 5.9644e-04
Loss = 1.5364e-02, PNorm = 128.0722, GNorm = 0.3301, lr_0 = 5.9603e-04
Loss = 1.9198e-02, PNorm = 128.1030, GNorm = 0.2278, lr_0 = 5.9562e-04
Loss = 1.7269e-02, PNorm = 128.1316, GNorm = 0.3070, lr_0 = 5.9521e-04
Loss = 1.8855e-02, PNorm = 128.1651, GNorm = 0.2207, lr_0 = 5.9481e-04
Loss = 1.8206e-02, PNorm = 128.1965, GNorm = 0.4299, lr_0 = 5.9440e-04
Loss = 1.5275e-02, PNorm = 128.2268, GNorm = 0.5106, lr_0 = 5.9399e-04
Loss = 1.6834e-02, PNorm = 128.2587, GNorm = 0.2986, lr_0 = 5.9358e-04
Loss = 2.0049e-02, PNorm = 128.2928, GNorm = 0.6173, lr_0 = 5.9318e-04
Loss = 1.9353e-02, PNorm = 128.3353, GNorm = 0.4528, lr_0 = 5.9277e-04
Loss = 1.8265e-02, PNorm = 128.3743, GNorm = 0.1291, lr_0 = 5.9236e-04
Loss = 1.9541e-02, PNorm = 128.4098, GNorm = 0.3659, lr_0 = 5.9196e-04
Loss = 1.9337e-02, PNorm = 128.4443, GNorm = 0.2941, lr_0 = 5.9155e-04
Loss = 1.8075e-02, PNorm = 128.4907, GNorm = 0.2588, lr_0 = 5.9115e-04
Loss = 1.6784e-02, PNorm = 128.5340, GNorm = 0.2176, lr_0 = 5.9074e-04
Loss = 1.7529e-02, PNorm = 128.5699, GNorm = 0.1910, lr_0 = 5.9034e-04
Loss = 1.6338e-02, PNorm = 128.6083, GNorm = 0.2235, lr_0 = 5.8993e-04
Loss = 1.7740e-02, PNorm = 128.6463, GNorm = 0.1576, lr_0 = 5.8953e-04
Loss = 1.8664e-02, PNorm = 128.6911, GNorm = 0.1777, lr_0 = 5.8913e-04
Loss = 1.8225e-02, PNorm = 128.7308, GNorm = 0.1984, lr_0 = 5.8872e-04
Loss = 1.6970e-02, PNorm = 128.7620, GNorm = 0.4911, lr_0 = 5.8832e-04
Loss = 1.5707e-02, PNorm = 128.8006, GNorm = 0.1414, lr_0 = 5.8792e-04
Loss = 1.8877e-02, PNorm = 128.8372, GNorm = 0.3262, lr_0 = 5.8751e-04
Loss = 1.6763e-02, PNorm = 128.8734, GNorm = 0.2394, lr_0 = 5.8711e-04
Loss = 1.8014e-02, PNorm = 128.9110, GNorm = 0.1944, lr_0 = 5.8671e-04
Loss = 1.5359e-02, PNorm = 128.9472, GNorm = 0.3506, lr_0 = 5.8631e-04
Loss = 1.5783e-02, PNorm = 128.9875, GNorm = 0.2517, lr_0 = 5.8591e-04
Loss = 1.8986e-02, PNorm = 129.0234, GNorm = 0.2873, lr_0 = 5.8550e-04
Loss = 1.8690e-02, PNorm = 129.0601, GNorm = 0.2568, lr_0 = 5.8510e-04
Loss = 1.8885e-02, PNorm = 129.0963, GNorm = 0.4173, lr_0 = 5.8470e-04
Loss = 1.8648e-02, PNorm = 129.1332, GNorm = 0.3608, lr_0 = 5.8430e-04
Loss = 1.9240e-02, PNorm = 129.1780, GNorm = 0.1654, lr_0 = 5.8390e-04
Loss = 1.8755e-02, PNorm = 129.2217, GNorm = 0.4896, lr_0 = 5.8350e-04
Loss = 1.7389e-02, PNorm = 129.2624, GNorm = 0.1652, lr_0 = 5.8310e-04
Loss = 1.8226e-02, PNorm = 129.3014, GNorm = 0.2109, lr_0 = 5.8270e-04
Loss = 1.7547e-02, PNorm = 129.3340, GNorm = 0.3980, lr_0 = 5.8230e-04
Loss = 1.9435e-02, PNorm = 129.3734, GNorm = 0.4489, lr_0 = 5.8190e-04
Loss = 1.8613e-02, PNorm = 129.4083, GNorm = 0.3136, lr_0 = 5.8151e-04
Loss = 1.9151e-02, PNorm = 129.4483, GNorm = 0.2081, lr_0 = 5.8111e-04
Loss = 2.2205e-02, PNorm = 129.4899, GNorm = 0.4366, lr_0 = 5.8071e-04
Loss = 1.5789e-02, PNorm = 129.5305, GNorm = 0.2919, lr_0 = 5.8031e-04
Loss = 1.4947e-02, PNorm = 129.5733, GNorm = 0.2026, lr_0 = 5.7991e-04
Loss = 1.6495e-02, PNorm = 129.6114, GNorm = 0.4457, lr_0 = 5.7952e-04
Loss = 1.7704e-02, PNorm = 129.6502, GNorm = 0.2008, lr_0 = 5.7912e-04
Loss = 1.8441e-02, PNorm = 129.6875, GNorm = 0.3678, lr_0 = 5.7872e-04
Loss = 2.0796e-02, PNorm = 129.7210, GNorm = 0.3830, lr_0 = 5.7833e-04
Loss = 1.7621e-02, PNorm = 129.7614, GNorm = 0.3088, lr_0 = 5.7793e-04
Loss = 2.0798e-02, PNorm = 129.8035, GNorm = 0.7101, lr_0 = 5.7753e-04
Loss = 1.9100e-02, PNorm = 129.8524, GNorm = 0.3342, lr_0 = 5.7714e-04
Loss = 2.0136e-02, PNorm = 129.9008, GNorm = 0.3004, lr_0 = 5.7674e-04
Loss = 1.8895e-02, PNorm = 129.9410, GNorm = 0.2589, lr_0 = 5.7635e-04
Loss = 2.2413e-02, PNorm = 129.9814, GNorm = 0.3486, lr_0 = 5.7595e-04
Loss = 1.6717e-02, PNorm = 130.0267, GNorm = 0.3778, lr_0 = 5.7556e-04
Loss = 1.9692e-02, PNorm = 130.0719, GNorm = 0.2702, lr_0 = 5.7516e-04
Loss = 1.8368e-02, PNorm = 130.1114, GNorm = 0.1825, lr_0 = 5.7477e-04
Loss = 2.3553e-02, PNorm = 130.1504, GNorm = 0.2275, lr_0 = 5.7438e-04
Loss = 2.0583e-02, PNorm = 130.1933, GNorm = 0.9962, lr_0 = 5.7398e-04
Loss = 1.7723e-02, PNorm = 130.2367, GNorm = 0.3811, lr_0 = 5.7359e-04
Loss = 2.1640e-02, PNorm = 130.2782, GNorm = 0.1788, lr_0 = 5.7320e-04
Loss = 1.5284e-02, PNorm = 130.3155, GNorm = 0.2251, lr_0 = 5.7280e-04
Loss = 1.9706e-02, PNorm = 130.3554, GNorm = 0.2579, lr_0 = 5.7241e-04
Loss = 1.8035e-02, PNorm = 130.4000, GNorm = 0.2844, lr_0 = 5.7202e-04
Loss = 1.6924e-02, PNorm = 130.4421, GNorm = 0.6284, lr_0 = 5.7163e-04
Loss = 1.6881e-02, PNorm = 130.4769, GNorm = 0.7111, lr_0 = 5.7124e-04
Loss = 2.1660e-02, PNorm = 130.5154, GNorm = 0.4552, lr_0 = 5.7084e-04
Loss = 2.3751e-02, PNorm = 130.5594, GNorm = 0.3213, lr_0 = 5.7045e-04
Loss = 1.6301e-02, PNorm = 130.6065, GNorm = 0.5643, lr_0 = 5.7006e-04
Loss = 1.9626e-02, PNorm = 130.6477, GNorm = 0.4653, lr_0 = 5.6967e-04
Loss = 1.7424e-02, PNorm = 130.6920, GNorm = 0.2309, lr_0 = 5.6928e-04
Loss = 2.0210e-02, PNorm = 130.7351, GNorm = 0.2025, lr_0 = 5.6889e-04
Loss = 2.0355e-02, PNorm = 130.7834, GNorm = 0.1967, lr_0 = 5.6850e-04
Loss = 2.1517e-02, PNorm = 130.8231, GNorm = 0.8856, lr_0 = 5.6811e-04
Loss = 1.8975e-02, PNorm = 130.8639, GNorm = 0.4453, lr_0 = 5.6772e-04
Loss = 1.9418e-02, PNorm = 130.9024, GNorm = 0.2107, lr_0 = 5.6733e-04
Loss = 1.9547e-02, PNorm = 130.9459, GNorm = 0.5570, lr_0 = 5.6695e-04
Loss = 2.0188e-02, PNorm = 130.9925, GNorm = 0.6422, lr_0 = 5.6656e-04
Loss = 2.5137e-02, PNorm = 131.0356, GNorm = 0.1955, lr_0 = 5.6617e-04
Loss = 2.5282e-02, PNorm = 131.0850, GNorm = 0.4823, lr_0 = 5.6578e-04
Loss = 1.9201e-02, PNorm = 131.1342, GNorm = 0.2244, lr_0 = 5.6539e-04
Loss = 1.7002e-02, PNorm = 131.1768, GNorm = 0.3831, lr_0 = 5.6501e-04
Loss = 1.8150e-02, PNorm = 131.2194, GNorm = 0.1590, lr_0 = 5.6462e-04
Loss = 2.1438e-02, PNorm = 131.2601, GNorm = 0.5326, lr_0 = 5.6423e-04
Loss = 1.9299e-02, PNorm = 131.3013, GNorm = 0.5087, lr_0 = 5.6385e-04
Loss = 1.9725e-02, PNorm = 131.3407, GNorm = 0.1747, lr_0 = 5.6346e-04
Loss = 1.9837e-02, PNorm = 131.3828, GNorm = 0.2846, lr_0 = 5.6307e-04
Loss = 1.9828e-02, PNorm = 131.4214, GNorm = 0.3444, lr_0 = 5.6269e-04
Loss = 2.1668e-02, PNorm = 131.4601, GNorm = 0.2714, lr_0 = 5.6230e-04
Validation mae = 0.484107
Epoch 9
Loss = 1.4478e-02, PNorm = 131.4950, GNorm = 0.5008, lr_0 = 5.6192e-04
Loss = 1.7438e-02, PNorm = 131.5273, GNorm = 0.4763, lr_0 = 5.6153e-04
Loss = 1.3700e-02, PNorm = 131.5522, GNorm = 0.3939, lr_0 = 5.6115e-04
Loss = 1.5484e-02, PNorm = 131.5692, GNorm = 0.2227, lr_0 = 5.6076e-04
Loss = 1.6391e-02, PNorm = 131.5916, GNorm = 0.2154, lr_0 = 5.6038e-04
Loss = 1.3519e-02, PNorm = 131.6230, GNorm = 0.3517, lr_0 = 5.6000e-04
Loss = 1.4920e-02, PNorm = 131.6511, GNorm = 0.1950, lr_0 = 5.5961e-04
Loss = 1.4073e-02, PNorm = 131.6772, GNorm = 0.4231, lr_0 = 5.5923e-04
Loss = 1.7616e-02, PNorm = 131.7014, GNorm = 0.4945, lr_0 = 5.5885e-04
Loss = 1.6690e-02, PNorm = 131.7343, GNorm = 0.2101, lr_0 = 5.5846e-04
Loss = 1.5503e-02, PNorm = 131.7631, GNorm = 0.3143, lr_0 = 5.5808e-04
Loss = 1.4844e-02, PNorm = 131.7950, GNorm = 0.1917, lr_0 = 5.5770e-04
Loss = 1.8326e-02, PNorm = 131.8243, GNorm = 0.2432, lr_0 = 5.5732e-04
Loss = 1.4469e-02, PNorm = 131.8569, GNorm = 0.3073, lr_0 = 5.5693e-04
Loss = 1.3015e-02, PNorm = 131.8798, GNorm = 0.2288, lr_0 = 5.5655e-04
Loss = 1.7433e-02, PNorm = 131.9092, GNorm = 0.2301, lr_0 = 5.5617e-04
Loss = 1.2626e-02, PNorm = 131.9454, GNorm = 0.4701, lr_0 = 5.5579e-04
Loss = 1.5220e-02, PNorm = 131.9705, GNorm = 0.4023, lr_0 = 5.5541e-04
Loss = 1.4420e-02, PNorm = 132.0014, GNorm = 0.1579, lr_0 = 5.5503e-04
Loss = 1.4730e-02, PNorm = 132.0316, GNorm = 0.3075, lr_0 = 5.5465e-04
Loss = 1.3325e-02, PNorm = 132.0630, GNorm = 0.1509, lr_0 = 5.5427e-04
Loss = 1.3532e-02, PNorm = 132.0900, GNorm = 0.3367, lr_0 = 5.5389e-04
Loss = 1.5587e-02, PNorm = 132.1219, GNorm = 0.4229, lr_0 = 5.5351e-04
Loss = 1.3340e-02, PNorm = 132.1548, GNorm = 0.1778, lr_0 = 5.5313e-04
Loss = 1.3440e-02, PNorm = 132.1818, GNorm = 0.3540, lr_0 = 5.5275e-04
Loss = 1.8002e-02, PNorm = 132.2053, GNorm = 0.1373, lr_0 = 5.5237e-04
Loss = 1.4455e-02, PNorm = 132.2324, GNorm = 0.2017, lr_0 = 5.5199e-04
Loss = 1.6752e-02, PNorm = 132.2600, GNorm = 0.3986, lr_0 = 5.5162e-04
Loss = 1.3681e-02, PNorm = 132.2913, GNorm = 0.1269, lr_0 = 5.5124e-04
Loss = 1.3515e-02, PNorm = 132.3214, GNorm = 0.4400, lr_0 = 5.5086e-04
Loss = 1.5883e-02, PNorm = 132.3546, GNorm = 0.2489, lr_0 = 5.5048e-04
Loss = 1.3964e-02, PNorm = 132.3878, GNorm = 0.3803, lr_0 = 5.5011e-04
Loss = 1.6585e-02, PNorm = 132.4173, GNorm = 0.1953, lr_0 = 5.4973e-04
Loss = 1.3619e-02, PNorm = 132.4448, GNorm = 0.2882, lr_0 = 5.4935e-04
Loss = 1.6187e-02, PNorm = 132.4741, GNorm = 0.3143, lr_0 = 5.4898e-04
Loss = 1.2724e-02, PNorm = 132.5060, GNorm = 0.2070, lr_0 = 5.4860e-04
Loss = 1.5984e-02, PNorm = 132.5314, GNorm = 0.1871, lr_0 = 5.4822e-04
Loss = 1.4221e-02, PNorm = 132.5611, GNorm = 0.3737, lr_0 = 5.4785e-04
Loss = 1.6114e-02, PNorm = 132.5815, GNorm = 0.1899, lr_0 = 5.4747e-04
Loss = 1.4907e-02, PNorm = 132.6092, GNorm = 0.8789, lr_0 = 5.4710e-04
Loss = 1.7531e-02, PNorm = 132.6438, GNorm = 0.2971, lr_0 = 5.4672e-04
Loss = 1.6476e-02, PNorm = 132.6775, GNorm = 0.2784, lr_0 = 5.4635e-04
Loss = 1.5992e-02, PNorm = 132.7074, GNorm = 0.3330, lr_0 = 5.4597e-04
Loss = 1.2741e-02, PNorm = 132.7403, GNorm = 0.4183, lr_0 = 5.4560e-04
Loss = 1.5286e-02, PNorm = 132.7671, GNorm = 0.4263, lr_0 = 5.4523e-04
Loss = 1.5420e-02, PNorm = 132.8009, GNorm = 0.2751, lr_0 = 5.4485e-04
Loss = 1.3619e-02, PNorm = 132.8273, GNorm = 0.2410, lr_0 = 5.4448e-04
Loss = 1.2439e-02, PNorm = 132.8597, GNorm = 0.3477, lr_0 = 5.4411e-04
Loss = 1.5609e-02, PNorm = 132.8839, GNorm = 0.3917, lr_0 = 5.4373e-04
Loss = 1.4131e-02, PNorm = 132.9129, GNorm = 0.1385, lr_0 = 5.4336e-04
Loss = 1.9048e-02, PNorm = 132.9479, GNorm = 0.2514, lr_0 = 5.4299e-04
Loss = 1.6323e-02, PNorm = 132.9830, GNorm = 0.1140, lr_0 = 5.4262e-04
Loss = 1.3941e-02, PNorm = 133.0207, GNorm = 0.1861, lr_0 = 5.4225e-04
Loss = 1.3048e-02, PNorm = 133.0484, GNorm = 0.2079, lr_0 = 5.4187e-04
Loss = 1.4843e-02, PNorm = 133.0784, GNorm = 0.2625, lr_0 = 5.4150e-04
Loss = 1.8711e-02, PNorm = 133.1138, GNorm = 0.1816, lr_0 = 5.4113e-04
Loss = 1.4172e-02, PNorm = 133.1470, GNorm = 0.3868, lr_0 = 5.4076e-04
Loss = 1.3582e-02, PNorm = 133.1809, GNorm = 0.4584, lr_0 = 5.4039e-04
Loss = 1.4937e-02, PNorm = 133.2121, GNorm = 0.3015, lr_0 = 5.4002e-04
Loss = 1.5661e-02, PNorm = 133.2472, GNorm = 0.2610, lr_0 = 5.3965e-04
Loss = 1.2208e-02, PNorm = 133.2822, GNorm = 0.4971, lr_0 = 5.3928e-04
Loss = 1.3409e-02, PNorm = 133.3150, GNorm = 0.3128, lr_0 = 5.3891e-04
Loss = 1.5458e-02, PNorm = 133.3479, GNorm = 0.4016, lr_0 = 5.3854e-04
Loss = 1.4775e-02, PNorm = 133.3843, GNorm = 0.1999, lr_0 = 5.3817e-04
Loss = 1.4024e-02, PNorm = 133.4172, GNorm = 0.5535, lr_0 = 5.3781e-04
Loss = 1.3913e-02, PNorm = 133.4467, GNorm = 0.2794, lr_0 = 5.3744e-04
Loss = 1.6413e-02, PNorm = 133.4779, GNorm = 1.0163, lr_0 = 5.3707e-04
Loss = 1.7417e-02, PNorm = 133.5074, GNorm = 0.5168, lr_0 = 5.3670e-04
Loss = 1.5822e-02, PNorm = 133.5368, GNorm = 0.6711, lr_0 = 5.3633e-04
Loss = 1.6142e-02, PNorm = 133.5744, GNorm = 0.2365, lr_0 = 5.3597e-04
Loss = 1.5759e-02, PNorm = 133.6125, GNorm = 0.2536, lr_0 = 5.3560e-04
Loss = 1.4126e-02, PNorm = 133.6498, GNorm = 0.2809, lr_0 = 5.3523e-04
Loss = 1.3963e-02, PNorm = 133.6842, GNorm = 0.4749, lr_0 = 5.3486e-04
Loss = 1.5337e-02, PNorm = 133.7169, GNorm = 0.2293, lr_0 = 5.3450e-04
Loss = 1.6034e-02, PNorm = 133.7513, GNorm = 0.6164, lr_0 = 5.3413e-04
Loss = 1.4598e-02, PNorm = 133.7838, GNorm = 0.4672, lr_0 = 5.3377e-04
Loss = 1.5861e-02, PNorm = 133.8200, GNorm = 0.1671, lr_0 = 5.3340e-04
Loss = 1.7906e-02, PNorm = 133.8512, GNorm = 0.5756, lr_0 = 5.3304e-04
Loss = 1.5011e-02, PNorm = 133.8886, GNorm = 0.4697, lr_0 = 5.3267e-04
Loss = 1.4005e-02, PNorm = 133.9205, GNorm = 0.3363, lr_0 = 5.3231e-04
Loss = 1.5793e-02, PNorm = 133.9541, GNorm = 0.1326, lr_0 = 5.3194e-04
Loss = 1.5473e-02, PNorm = 133.9880, GNorm = 0.2205, lr_0 = 5.3158e-04
Loss = 1.3923e-02, PNorm = 134.0207, GNorm = 0.1531, lr_0 = 5.3121e-04
Loss = 1.8228e-02, PNorm = 134.0535, GNorm = 0.2282, lr_0 = 5.3085e-04
Loss = 1.3315e-02, PNorm = 134.0891, GNorm = 0.1352, lr_0 = 5.3048e-04
Loss = 1.5685e-02, PNorm = 134.1270, GNorm = 0.3034, lr_0 = 5.3012e-04
Loss = 1.5568e-02, PNorm = 134.1668, GNorm = 0.3156, lr_0 = 5.2976e-04
Loss = 1.4956e-02, PNorm = 134.1998, GNorm = 0.1737, lr_0 = 5.2939e-04
Loss = 1.5625e-02, PNorm = 134.2400, GNorm = 0.1281, lr_0 = 5.2903e-04
Loss = 1.6631e-02, PNorm = 134.2789, GNorm = 0.6489, lr_0 = 5.2867e-04
Loss = 1.6104e-02, PNorm = 134.3173, GNorm = 0.3189, lr_0 = 5.2831e-04
Loss = 1.4153e-02, PNorm = 134.3513, GNorm = 0.2986, lr_0 = 5.2795e-04
Loss = 1.5824e-02, PNorm = 134.3871, GNorm = 0.2710, lr_0 = 5.2758e-04
Loss = 1.2689e-02, PNorm = 134.4194, GNorm = 0.1537, lr_0 = 5.2722e-04
Loss = 1.5950e-02, PNorm = 134.4564, GNorm = 0.2871, lr_0 = 5.2686e-04
Loss = 1.5437e-02, PNorm = 134.4933, GNorm = 0.3608, lr_0 = 5.2650e-04
Loss = 1.5921e-02, PNorm = 134.5263, GNorm = 0.3912, lr_0 = 5.2614e-04
Loss = 1.7546e-02, PNorm = 134.5650, GNorm = 0.2177, lr_0 = 5.2578e-04
Loss = 1.7230e-02, PNorm = 134.6005, GNorm = 0.3890, lr_0 = 5.2542e-04
Loss = 1.2081e-02, PNorm = 134.6375, GNorm = 0.3916, lr_0 = 5.2506e-04
Loss = 1.7162e-02, PNorm = 134.6678, GNorm = 0.4580, lr_0 = 5.2470e-04
Loss = 1.7617e-02, PNorm = 134.7003, GNorm = 0.4730, lr_0 = 5.2434e-04
Loss = 1.3713e-02, PNorm = 134.7382, GNorm = 0.3844, lr_0 = 5.2398e-04
Loss = 1.3878e-02, PNorm = 134.7744, GNorm = 0.1286, lr_0 = 5.2362e-04
Loss = 1.3792e-02, PNorm = 134.8081, GNorm = 0.4077, lr_0 = 5.2326e-04
Loss = 1.4265e-02, PNorm = 134.8375, GNorm = 0.1508, lr_0 = 5.2290e-04
Loss = 1.5990e-02, PNorm = 134.8683, GNorm = 0.4185, lr_0 = 5.2255e-04
Loss = 1.7378e-02, PNorm = 134.8964, GNorm = 0.3750, lr_0 = 5.2219e-04
Loss = 1.5275e-02, PNorm = 134.9325, GNorm = 0.2776, lr_0 = 5.2183e-04
Loss = 1.5125e-02, PNorm = 134.9699, GNorm = 0.3117, lr_0 = 5.2147e-04
Loss = 1.6746e-02, PNorm = 135.0035, GNorm = 0.7986, lr_0 = 5.2112e-04
Loss = 1.6439e-02, PNorm = 135.0353, GNorm = 0.1745, lr_0 = 5.2076e-04
Loss = 1.3157e-02, PNorm = 135.0700, GNorm = 0.2609, lr_0 = 5.2040e-04
Loss = 1.6089e-02, PNorm = 135.1056, GNorm = 0.7467, lr_0 = 5.2005e-04
Loss = 1.7681e-02, PNorm = 135.1459, GNorm = 0.4960, lr_0 = 5.1969e-04
Loss = 1.6110e-02, PNorm = 135.1886, GNorm = 0.3219, lr_0 = 5.1933e-04
Loss = 1.4983e-02, PNorm = 135.2260, GNorm = 0.1570, lr_0 = 5.1898e-04
Loss = 1.3985e-02, PNorm = 135.2660, GNorm = 0.4277, lr_0 = 5.1862e-04
Loss = 1.5522e-02, PNorm = 135.3059, GNorm = 0.4951, lr_0 = 5.1827e-04
Loss = 1.5094e-02, PNorm = 135.3479, GNorm = 0.3770, lr_0 = 5.1791e-04
Validation mae = 0.481091
Epoch 10
Loss = 1.4732e-02, PNorm = 135.3824, GNorm = 0.3526, lr_0 = 5.1756e-04
Loss = 1.3781e-02, PNorm = 135.4102, GNorm = 0.1645, lr_0 = 5.1720e-04
Loss = 1.2887e-02, PNorm = 135.4300, GNorm = 0.5626, lr_0 = 5.1685e-04
Loss = 1.2649e-02, PNorm = 135.4551, GNorm = 0.3412, lr_0 = 5.1649e-04
Loss = 1.1148e-02, PNorm = 135.4743, GNorm = 0.3074, lr_0 = 5.1614e-04
Loss = 1.2618e-02, PNorm = 135.4991, GNorm = 0.1549, lr_0 = 5.1579e-04
Loss = 1.4025e-02, PNorm = 135.5300, GNorm = 0.1455, lr_0 = 5.1543e-04
Loss = 1.3851e-02, PNorm = 135.5571, GNorm = 0.1568, lr_0 = 5.1508e-04
Loss = 1.3507e-02, PNorm = 135.5826, GNorm = 0.1762, lr_0 = 5.1473e-04
Loss = 1.1130e-02, PNorm = 135.6065, GNorm = 0.4575, lr_0 = 5.1437e-04
Loss = 1.2454e-02, PNorm = 135.6326, GNorm = 0.2132, lr_0 = 5.1402e-04
Loss = 1.4909e-02, PNorm = 135.6569, GNorm = 0.1805, lr_0 = 5.1367e-04
Loss = 1.2389e-02, PNorm = 135.6818, GNorm = 0.3115, lr_0 = 5.1332e-04
Loss = 1.3501e-02, PNorm = 135.7022, GNorm = 0.2839, lr_0 = 5.1297e-04
Loss = 1.6680e-02, PNorm = 135.7218, GNorm = 0.4124, lr_0 = 5.1262e-04
Loss = 1.6507e-02, PNorm = 135.7521, GNorm = 0.1411, lr_0 = 5.1226e-04
Loss = 1.2786e-02, PNorm = 135.7731, GNorm = 0.4067, lr_0 = 5.1191e-04
Loss = 1.9337e-02, PNorm = 135.8013, GNorm = 0.2620, lr_0 = 5.1156e-04
Loss = 1.4033e-02, PNorm = 135.8264, GNorm = 0.3819, lr_0 = 5.1121e-04
Loss = 1.2473e-02, PNorm = 135.8532, GNorm = 0.2627, lr_0 = 5.1086e-04
Loss = 1.3555e-02, PNorm = 135.8839, GNorm = 0.1674, lr_0 = 5.1051e-04
Loss = 1.3120e-02, PNorm = 135.9110, GNorm = 0.1397, lr_0 = 5.1016e-04
Loss = 9.6331e-03, PNorm = 135.9392, GNorm = 0.2657, lr_0 = 5.0981e-04
Loss = 1.4390e-02, PNorm = 135.9626, GNorm = 0.4323, lr_0 = 5.0946e-04
Loss = 1.1856e-02, PNorm = 135.9845, GNorm = 0.3274, lr_0 = 5.0911e-04
Loss = 1.2282e-02, PNorm = 136.0127, GNorm = 0.2266, lr_0 = 5.0877e-04
Loss = 1.0094e-02, PNorm = 136.0415, GNorm = 0.2356, lr_0 = 5.0842e-04
Loss = 1.2511e-02, PNorm = 136.0656, GNorm = 0.2049, lr_0 = 5.0807e-04
Loss = 1.1799e-02, PNorm = 136.0922, GNorm = 0.4828, lr_0 = 5.0772e-04
Loss = 1.3942e-02, PNorm = 136.1202, GNorm = 0.2265, lr_0 = 5.0737e-04
Loss = 1.0096e-02, PNorm = 136.1440, GNorm = 0.1395, lr_0 = 5.0703e-04
Loss = 1.2146e-02, PNorm = 136.1672, GNorm = 0.1447, lr_0 = 5.0668e-04
Loss = 1.1419e-02, PNorm = 136.1902, GNorm = 0.2220, lr_0 = 5.0633e-04
Loss = 1.1821e-02, PNorm = 136.2153, GNorm = 0.2106, lr_0 = 5.0598e-04
Loss = 1.3231e-02, PNorm = 136.2396, GNorm = 0.3839, lr_0 = 5.0564e-04
Loss = 1.1724e-02, PNorm = 136.2647, GNorm = 0.2914, lr_0 = 5.0529e-04
Loss = 1.4424e-02, PNorm = 136.2940, GNorm = 0.1510, lr_0 = 5.0494e-04
Loss = 1.2988e-02, PNorm = 136.3249, GNorm = 0.3726, lr_0 = 5.0460e-04
Loss = 1.0096e-02, PNorm = 136.3514, GNorm = 0.1451, lr_0 = 5.0425e-04
Loss = 1.3801e-02, PNorm = 136.3766, GNorm = 0.5363, lr_0 = 5.0391e-04
Loss = 1.3134e-02, PNorm = 136.4028, GNorm = 0.5668, lr_0 = 5.0356e-04
Loss = 1.3443e-02, PNorm = 136.4328, GNorm = 0.1879, lr_0 = 5.0322e-04
Loss = 1.0822e-02, PNorm = 136.4567, GNorm = 0.1230, lr_0 = 5.0287e-04
Loss = 1.2097e-02, PNorm = 136.4783, GNorm = 0.4597, lr_0 = 5.0253e-04
Loss = 1.0650e-02, PNorm = 136.5021, GNorm = 0.3454, lr_0 = 5.0218e-04
Loss = 1.2786e-02, PNorm = 136.5255, GNorm = 0.4007, lr_0 = 5.0184e-04
Loss = 9.7549e-03, PNorm = 136.5521, GNorm = 0.2862, lr_0 = 5.0150e-04
Loss = 1.2050e-02, PNorm = 136.5799, GNorm = 0.3500, lr_0 = 5.0115e-04
Loss = 1.1972e-02, PNorm = 136.6092, GNorm = 0.2669, lr_0 = 5.0081e-04
Loss = 1.0623e-02, PNorm = 136.6320, GNorm = 0.3354, lr_0 = 5.0047e-04
Loss = 1.2621e-02, PNorm = 136.6529, GNorm = 0.2040, lr_0 = 5.0012e-04
Loss = 1.1800e-02, PNorm = 136.6783, GNorm = 0.4672, lr_0 = 4.9978e-04
Loss = 1.0858e-02, PNorm = 136.7014, GNorm = 0.4515, lr_0 = 4.9944e-04
Loss = 1.2480e-02, PNorm = 136.7292, GNorm = 0.3114, lr_0 = 4.9910e-04
Loss = 1.0351e-02, PNorm = 136.7588, GNorm = 0.1193, lr_0 = 4.9875e-04
Loss = 1.2984e-02, PNorm = 136.7829, GNorm = 0.3983, lr_0 = 4.9841e-04
Loss = 1.4178e-02, PNorm = 136.8119, GNorm = 0.1322, lr_0 = 4.9807e-04
Loss = 1.1786e-02, PNorm = 136.8360, GNorm = 0.3120, lr_0 = 4.9773e-04
Loss = 1.2257e-02, PNorm = 136.8582, GNorm = 0.2467, lr_0 = 4.9739e-04
Loss = 1.2905e-02, PNorm = 136.8873, GNorm = 0.1258, lr_0 = 4.9705e-04
Loss = 1.1191e-02, PNorm = 136.9120, GNorm = 0.2601, lr_0 = 4.9671e-04
Loss = 1.0329e-02, PNorm = 136.9425, GNorm = 0.1572, lr_0 = 4.9637e-04
Loss = 1.2561e-02, PNorm = 136.9717, GNorm = 0.3452, lr_0 = 4.9603e-04
Loss = 1.2680e-02, PNorm = 137.0014, GNorm = 0.3518, lr_0 = 4.9569e-04
Loss = 1.3281e-02, PNorm = 137.0298, GNorm = 0.4391, lr_0 = 4.9535e-04
Loss = 1.3687e-02, PNorm = 137.0589, GNorm = 0.2049, lr_0 = 4.9501e-04
Loss = 1.1968e-02, PNorm = 137.0887, GNorm = 0.3094, lr_0 = 4.9467e-04
Loss = 1.1252e-02, PNorm = 137.1157, GNorm = 0.1712, lr_0 = 4.9433e-04
Loss = 1.2407e-02, PNorm = 137.1455, GNorm = 0.4167, lr_0 = 4.9399e-04
Loss = 1.3961e-02, PNorm = 137.1724, GNorm = 0.5493, lr_0 = 4.9365e-04
Loss = 1.1939e-02, PNorm = 137.1988, GNorm = 0.2291, lr_0 = 4.9332e-04
Loss = 1.0370e-02, PNorm = 137.2237, GNorm = 0.1918, lr_0 = 4.9298e-04
Loss = 1.1282e-02, PNorm = 137.2493, GNorm = 0.1900, lr_0 = 4.9264e-04
Loss = 1.0475e-02, PNorm = 137.2734, GNorm = 0.1646, lr_0 = 4.9230e-04
Loss = 1.0756e-02, PNorm = 137.2999, GNorm = 0.5700, lr_0 = 4.9197e-04
Loss = 1.0686e-02, PNorm = 137.3284, GNorm = 0.1914, lr_0 = 4.9163e-04
Loss = 1.4536e-02, PNorm = 137.3573, GNorm = 0.1694, lr_0 = 4.9129e-04
Loss = 1.1360e-02, PNorm = 137.3828, GNorm = 0.1302, lr_0 = 4.9095e-04
Loss = 1.1538e-02, PNorm = 137.4076, GNorm = 0.3635, lr_0 = 4.9062e-04
Loss = 1.6905e-02, PNorm = 137.4324, GNorm = 0.6487, lr_0 = 4.9028e-04
Loss = 1.2173e-02, PNorm = 137.4631, GNorm = 0.4359, lr_0 = 4.8995e-04
Loss = 1.0303e-02, PNorm = 137.4869, GNorm = 0.2679, lr_0 = 4.8961e-04
Loss = 1.3085e-02, PNorm = 137.5135, GNorm = 0.2109, lr_0 = 4.8928e-04
Loss = 1.2559e-02, PNorm = 137.5393, GNorm = 0.1367, lr_0 = 4.8894e-04
Loss = 1.2334e-02, PNorm = 137.5690, GNorm = 0.2122, lr_0 = 4.8861e-04
Loss = 1.3211e-02, PNorm = 137.5989, GNorm = 0.1798, lr_0 = 4.8827e-04
Loss = 1.0240e-02, PNorm = 137.6298, GNorm = 0.3244, lr_0 = 4.8794e-04
Loss = 1.1415e-02, PNorm = 137.6621, GNorm = 0.4281, lr_0 = 4.8760e-04
Loss = 1.2617e-02, PNorm = 137.6933, GNorm = 0.6724, lr_0 = 4.8727e-04
Loss = 1.0864e-02, PNorm = 137.7220, GNorm = 0.5370, lr_0 = 4.8693e-04
Loss = 1.2017e-02, PNorm = 137.7497, GNorm = 0.3041, lr_0 = 4.8660e-04
Loss = 1.1568e-02, PNorm = 137.7783, GNorm = 0.3406, lr_0 = 4.8627e-04
Loss = 1.1472e-02, PNorm = 137.8047, GNorm = 0.2518, lr_0 = 4.8593e-04
Loss = 1.3748e-02, PNorm = 137.8347, GNorm = 0.4012, lr_0 = 4.8560e-04
Loss = 1.3002e-02, PNorm = 137.8708, GNorm = 0.2062, lr_0 = 4.8527e-04
Loss = 1.1890e-02, PNorm = 137.9054, GNorm = 0.2332, lr_0 = 4.8494e-04
Loss = 1.1802e-02, PNorm = 137.9405, GNorm = 0.3372, lr_0 = 4.8460e-04
Loss = 1.0889e-02, PNorm = 137.9733, GNorm = 0.3009, lr_0 = 4.8427e-04
Loss = 1.2770e-02, PNorm = 138.0031, GNorm = 0.8576, lr_0 = 4.8394e-04
Loss = 1.4058e-02, PNorm = 138.0290, GNorm = 0.4669, lr_0 = 4.8361e-04
Loss = 1.3531e-02, PNorm = 138.0573, GNorm = 0.1862, lr_0 = 4.8328e-04
Loss = 1.4725e-02, PNorm = 138.0840, GNorm = 0.7505, lr_0 = 4.8295e-04
Loss = 1.1820e-02, PNorm = 138.1143, GNorm = 0.2609, lr_0 = 4.8262e-04
Loss = 1.1831e-02, PNorm = 138.1435, GNorm = 0.1527, lr_0 = 4.8228e-04
Loss = 1.1431e-02, PNorm = 138.1737, GNorm = 0.5269, lr_0 = 4.8195e-04
Loss = 1.1474e-02, PNorm = 138.2009, GNorm = 0.1221, lr_0 = 4.8162e-04
Loss = 1.1387e-02, PNorm = 138.2262, GNorm = 0.1432, lr_0 = 4.8129e-04
Loss = 1.3383e-02, PNorm = 138.2515, GNorm = 0.1892, lr_0 = 4.8096e-04
Loss = 1.9147e-02, PNorm = 138.2880, GNorm = 0.3080, lr_0 = 4.8064e-04
Loss = 1.3483e-02, PNorm = 138.3233, GNorm = 0.4001, lr_0 = 4.8031e-04
Loss = 1.3029e-02, PNorm = 138.3580, GNorm = 0.2671, lr_0 = 4.7998e-04
Loss = 1.1513e-02, PNorm = 138.3886, GNorm = 0.1261, lr_0 = 4.7965e-04
Loss = 1.1292e-02, PNorm = 138.4202, GNorm = 0.4741, lr_0 = 4.7932e-04
Loss = 1.2908e-02, PNorm = 138.4581, GNorm = 0.2180, lr_0 = 4.7899e-04
Loss = 1.1410e-02, PNorm = 138.4932, GNorm = 0.2550, lr_0 = 4.7866e-04
Loss = 1.5148e-02, PNorm = 138.5233, GNorm = 0.2879, lr_0 = 4.7833e-04
Loss = 1.3213e-02, PNorm = 138.5549, GNorm = 0.2790, lr_0 = 4.7801e-04
Loss = 1.3446e-02, PNorm = 138.5878, GNorm = 0.1750, lr_0 = 4.7768e-04
Loss = 1.0136e-02, PNorm = 138.6180, GNorm = 0.2963, lr_0 = 4.7735e-04
Loss = 1.6106e-02, PNorm = 138.6445, GNorm = 0.2322, lr_0 = 4.7703e-04
Validation mae = 0.479957
Epoch 11
Loss = 9.8385e-03, PNorm = 138.6656, GNorm = 0.2058, lr_0 = 4.7670e-04
Loss = 1.1844e-02, PNorm = 138.6849, GNorm = 0.2336, lr_0 = 4.7637e-04
Loss = 1.2123e-02, PNorm = 138.7116, GNorm = 0.3081, lr_0 = 4.7605e-04
Loss = 1.1610e-02, PNorm = 138.7331, GNorm = 0.4047, lr_0 = 4.7572e-04
Loss = 1.1458e-02, PNorm = 138.7567, GNorm = 0.1444, lr_0 = 4.7539e-04
Loss = 1.1113e-02, PNorm = 138.7737, GNorm = 0.3135, lr_0 = 4.7507e-04
Loss = 8.9408e-03, PNorm = 138.7922, GNorm = 0.1904, lr_0 = 4.7474e-04
Loss = 1.3968e-02, PNorm = 138.8130, GNorm = 0.4084, lr_0 = 4.7442e-04
Loss = 1.2516e-02, PNorm = 138.8332, GNorm = 0.2157, lr_0 = 4.7409e-04
Loss = 1.1665e-02, PNorm = 138.8529, GNorm = 0.3108, lr_0 = 4.7377e-04
Loss = 9.4729e-03, PNorm = 138.8774, GNorm = 0.1680, lr_0 = 4.7344e-04
Loss = 8.3877e-03, PNorm = 138.9002, GNorm = 0.1911, lr_0 = 4.7312e-04
Loss = 8.8387e-03, PNorm = 138.9241, GNorm = 0.1296, lr_0 = 4.7279e-04
Loss = 9.6769e-03, PNorm = 138.9470, GNorm = 0.3339, lr_0 = 4.7247e-04
Loss = 9.0325e-03, PNorm = 138.9698, GNorm = 0.1666, lr_0 = 4.7215e-04
Loss = 9.3342e-03, PNorm = 138.9915, GNorm = 0.1447, lr_0 = 4.7182e-04
Loss = 8.7288e-03, PNorm = 139.0124, GNorm = 0.2858, lr_0 = 4.7150e-04
Loss = 1.0497e-02, PNorm = 139.0332, GNorm = 0.3619, lr_0 = 4.7118e-04
Loss = 8.4046e-03, PNorm = 139.0538, GNorm = 0.1630, lr_0 = 4.7085e-04
Loss = 1.1011e-02, PNorm = 139.0755, GNorm = 0.4100, lr_0 = 4.7053e-04
Loss = 9.0290e-03, PNorm = 139.0971, GNorm = 0.2849, lr_0 = 4.7021e-04
Loss = 9.9315e-03, PNorm = 139.1185, GNorm = 0.2683, lr_0 = 4.6989e-04
Loss = 1.0276e-02, PNorm = 139.1375, GNorm = 0.2120, lr_0 = 4.6957e-04
Loss = 9.8133e-03, PNorm = 139.1591, GNorm = 0.2945, lr_0 = 4.6924e-04
Loss = 1.0132e-02, PNorm = 139.1828, GNorm = 0.1870, lr_0 = 4.6892e-04
Loss = 1.0712e-02, PNorm = 139.2017, GNorm = 0.1272, lr_0 = 4.6860e-04
Loss = 9.5935e-03, PNorm = 139.2275, GNorm = 0.3330, lr_0 = 4.6828e-04
Loss = 1.0352e-02, PNorm = 139.2491, GNorm = 0.1750, lr_0 = 4.6796e-04
Loss = 1.0421e-02, PNorm = 139.2670, GNorm = 0.3752, lr_0 = 4.6764e-04
Loss = 1.2371e-02, PNorm = 139.2866, GNorm = 0.2412, lr_0 = 4.6732e-04
Loss = 1.2055e-02, PNorm = 139.3142, GNorm = 0.1735, lr_0 = 4.6700e-04
Loss = 9.8957e-03, PNorm = 139.3401, GNorm = 0.1132, lr_0 = 4.6668e-04
Loss = 1.1194e-02, PNorm = 139.3652, GNorm = 0.2979, lr_0 = 4.6636e-04
Loss = 1.2453e-02, PNorm = 139.3895, GNorm = 0.5071, lr_0 = 4.6604e-04
Loss = 9.9605e-03, PNorm = 139.4114, GNorm = 0.3626, lr_0 = 4.6572e-04
Loss = 9.8624e-03, PNorm = 139.4325, GNorm = 0.2527, lr_0 = 4.6540e-04
Loss = 1.0597e-02, PNorm = 139.4501, GNorm = 0.3313, lr_0 = 4.6508e-04
Loss = 8.9314e-03, PNorm = 139.4723, GNorm = 0.2755, lr_0 = 4.6476e-04
Loss = 9.9212e-03, PNorm = 139.4908, GNorm = 0.2240, lr_0 = 4.6445e-04
Loss = 1.1813e-02, PNorm = 139.5184, GNorm = 0.8919, lr_0 = 4.6413e-04
Loss = 1.0502e-02, PNorm = 139.5451, GNorm = 0.4523, lr_0 = 4.6381e-04
Loss = 1.0662e-02, PNorm = 139.5696, GNorm = 0.3099, lr_0 = 4.6349e-04
Loss = 9.3315e-03, PNorm = 139.5920, GNorm = 0.1192, lr_0 = 4.6317e-04
Loss = 9.9885e-03, PNorm = 139.6145, GNorm = 0.1199, lr_0 = 4.6286e-04
Loss = 9.4315e-03, PNorm = 139.6352, GNorm = 0.1432, lr_0 = 4.6254e-04
Loss = 8.4568e-03, PNorm = 139.6588, GNorm = 0.3058, lr_0 = 4.6222e-04
Loss = 1.0163e-02, PNorm = 139.6771, GNorm = 0.5334, lr_0 = 4.6191e-04
Loss = 9.6815e-03, PNorm = 139.7017, GNorm = 0.4104, lr_0 = 4.6159e-04
Loss = 9.9975e-03, PNorm = 139.7279, GNorm = 0.4686, lr_0 = 4.6127e-04
Loss = 1.2270e-02, PNorm = 139.7457, GNorm = 0.3316, lr_0 = 4.6096e-04
Loss = 1.1039e-02, PNorm = 139.7736, GNorm = 0.4380, lr_0 = 4.6064e-04
Loss = 1.2253e-02, PNorm = 139.7973, GNorm = 0.3384, lr_0 = 4.6033e-04
Loss = 8.7157e-03, PNorm = 139.8190, GNorm = 0.5291, lr_0 = 4.6001e-04
Loss = 9.4073e-03, PNorm = 139.8395, GNorm = 0.1723, lr_0 = 4.5970e-04
Loss = 1.2411e-02, PNorm = 139.8640, GNorm = 0.3936, lr_0 = 4.5938e-04
Loss = 1.1657e-02, PNorm = 139.8919, GNorm = 0.1519, lr_0 = 4.5907e-04
Loss = 1.0336e-02, PNorm = 139.9158, GNorm = 0.2260, lr_0 = 4.5875e-04
Loss = 8.7251e-03, PNorm = 139.9370, GNorm = 0.1664, lr_0 = 4.5844e-04
Loss = 1.0203e-02, PNorm = 139.9596, GNorm = 0.1268, lr_0 = 4.5812e-04
Loss = 8.8650e-03, PNorm = 139.9808, GNorm = 0.3705, lr_0 = 4.5781e-04
Loss = 1.0516e-02, PNorm = 140.0036, GNorm = 0.3910, lr_0 = 4.5750e-04
Loss = 7.5581e-03, PNorm = 140.0267, GNorm = 0.4275, lr_0 = 4.5718e-04
Loss = 9.8271e-03, PNorm = 140.0500, GNorm = 0.0850, lr_0 = 4.5687e-04
Loss = 1.0028e-02, PNorm = 140.0713, GNorm = 0.3035, lr_0 = 4.5656e-04
Loss = 1.0456e-02, PNorm = 140.0948, GNorm = 0.2018, lr_0 = 4.5624e-04
Loss = 1.2664e-02, PNorm = 140.1235, GNorm = 0.1947, lr_0 = 4.5593e-04
Loss = 1.1234e-02, PNorm = 140.1495, GNorm = 0.3802, lr_0 = 4.5562e-04
Loss = 1.0087e-02, PNorm = 140.1632, GNorm = 0.2540, lr_0 = 4.5531e-04
Loss = 9.6205e-03, PNorm = 140.1867, GNorm = 0.4399, lr_0 = 4.5499e-04
Loss = 1.1683e-02, PNorm = 140.2100, GNorm = 0.4455, lr_0 = 4.5468e-04
Loss = 9.4857e-03, PNorm = 140.2333, GNorm = 0.4272, lr_0 = 4.5437e-04
Loss = 1.1388e-02, PNorm = 140.2567, GNorm = 0.4578, lr_0 = 4.5406e-04
Loss = 9.6633e-03, PNorm = 140.2815, GNorm = 0.5923, lr_0 = 4.5375e-04
Loss = 8.8650e-03, PNorm = 140.3056, GNorm = 0.5884, lr_0 = 4.5344e-04
Loss = 8.9586e-03, PNorm = 140.3244, GNorm = 0.2805, lr_0 = 4.5313e-04
Loss = 1.1986e-02, PNorm = 140.3475, GNorm = 0.3536, lr_0 = 4.5282e-04
Loss = 1.0279e-02, PNorm = 140.3654, GNorm = 0.2288, lr_0 = 4.5251e-04
Loss = 1.1152e-02, PNorm = 140.3904, GNorm = 0.4144, lr_0 = 4.5220e-04
Loss = 1.0549e-02, PNorm = 140.4112, GNorm = 0.1928, lr_0 = 4.5189e-04
Loss = 9.4621e-03, PNorm = 140.4342, GNorm = 0.2341, lr_0 = 4.5158e-04
Loss = 1.4139e-02, PNorm = 140.4595, GNorm = 1.0028, lr_0 = 4.5127e-04
Loss = 9.9306e-03, PNorm = 140.4872, GNorm = 0.1412, lr_0 = 4.5096e-04
Loss = 1.1000e-02, PNorm = 140.5102, GNorm = 0.2905, lr_0 = 4.5065e-04
Loss = 1.5097e-02, PNorm = 140.5354, GNorm = 0.2563, lr_0 = 4.5034e-04
Loss = 1.0453e-02, PNorm = 140.5657, GNorm = 0.3666, lr_0 = 4.5003e-04
Loss = 9.4472e-03, PNorm = 140.5932, GNorm = 0.4031, lr_0 = 4.4972e-04
Loss = 8.4586e-03, PNorm = 140.6194, GNorm = 0.2646, lr_0 = 4.4942e-04
Loss = 1.0613e-02, PNorm = 140.6452, GNorm = 0.3512, lr_0 = 4.4911e-04
Loss = 9.5414e-03, PNorm = 140.6724, GNorm = 0.3355, lr_0 = 4.4880e-04
Loss = 8.0779e-03, PNorm = 140.6943, GNorm = 0.1641, lr_0 = 4.4849e-04
Loss = 8.6669e-03, PNorm = 140.7160, GNorm = 0.1635, lr_0 = 4.4819e-04
Loss = 8.9136e-03, PNorm = 140.7381, GNorm = 0.3315, lr_0 = 4.4788e-04
Loss = 8.2647e-03, PNorm = 140.7644, GNorm = 0.0972, lr_0 = 4.4757e-04
Loss = 1.4151e-02, PNorm = 140.7868, GNorm = 0.3957, lr_0 = 4.4727e-04
Loss = 1.0429e-02, PNorm = 140.8034, GNorm = 0.4403, lr_0 = 4.4696e-04
Loss = 8.7590e-03, PNorm = 140.8271, GNorm = 0.1300, lr_0 = 4.4665e-04
Loss = 1.1579e-02, PNorm = 140.8521, GNorm = 0.3010, lr_0 = 4.4635e-04
Loss = 1.1505e-02, PNorm = 140.8771, GNorm = 0.1857, lr_0 = 4.4604e-04
Loss = 8.6105e-03, PNorm = 140.9045, GNorm = 0.2467, lr_0 = 4.4574e-04
Loss = 9.2550e-03, PNorm = 140.9270, GNorm = 0.1258, lr_0 = 4.4543e-04
Loss = 9.3206e-03, PNorm = 140.9486, GNorm = 0.3744, lr_0 = 4.4513e-04
Loss = 8.9388e-03, PNorm = 140.9709, GNorm = 0.2540, lr_0 = 4.4482e-04
Loss = 1.0381e-02, PNorm = 140.9927, GNorm = 0.1192, lr_0 = 4.4452e-04
Loss = 8.3273e-03, PNorm = 141.0152, GNorm = 0.2036, lr_0 = 4.4421e-04
Loss = 9.4325e-03, PNorm = 141.0372, GNorm = 0.3943, lr_0 = 4.4391e-04
Loss = 1.1782e-02, PNorm = 141.0648, GNorm = 0.1327, lr_0 = 4.4360e-04
Loss = 8.8850e-03, PNorm = 141.0893, GNorm = 0.1860, lr_0 = 4.4330e-04
Loss = 7.9162e-03, PNorm = 141.1094, GNorm = 0.1282, lr_0 = 4.4299e-04
Loss = 1.0469e-02, PNorm = 141.1284, GNorm = 0.1040, lr_0 = 4.4269e-04
Loss = 9.4850e-03, PNorm = 141.1532, GNorm = 0.1838, lr_0 = 4.4239e-04
Loss = 1.0476e-02, PNorm = 141.1833, GNorm = 0.2505, lr_0 = 4.4209e-04
Loss = 1.2202e-02, PNorm = 141.2145, GNorm = 0.4856, lr_0 = 4.4178e-04
Loss = 9.2759e-03, PNorm = 141.2412, GNorm = 0.3307, lr_0 = 4.4148e-04
Loss = 9.5840e-03, PNorm = 141.2627, GNorm = 0.2068, lr_0 = 4.4118e-04
Loss = 1.0083e-02, PNorm = 141.2855, GNorm = 0.2707, lr_0 = 4.4088e-04
Loss = 1.1734e-02, PNorm = 141.3110, GNorm = 0.2544, lr_0 = 4.4057e-04
Loss = 9.5516e-03, PNorm = 141.3341, GNorm = 0.1583, lr_0 = 4.4027e-04
Loss = 8.7022e-03, PNorm = 141.3555, GNorm = 0.3918, lr_0 = 4.3997e-04
Loss = 9.5563e-03, PNorm = 141.3770, GNorm = 0.5308, lr_0 = 4.3967e-04
Loss = 9.1147e-03, PNorm = 141.4026, GNorm = 0.2918, lr_0 = 4.3937e-04
Validation mae = 0.479168
Epoch 12
Loss = 1.0083e-02, PNorm = 141.4218, GNorm = 0.2021, lr_0 = 4.3907e-04
Loss = 9.5587e-03, PNorm = 141.4374, GNorm = 0.2343, lr_0 = 4.3877e-04
Loss = 8.9695e-03, PNorm = 141.4528, GNorm = 0.1537, lr_0 = 4.3846e-04
Loss = 9.5710e-03, PNorm = 141.4705, GNorm = 0.2363, lr_0 = 4.3816e-04
Loss = 8.6372e-03, PNorm = 141.4890, GNorm = 0.1650, lr_0 = 4.3786e-04
Loss = 7.3466e-03, PNorm = 141.5094, GNorm = 0.1618, lr_0 = 4.3756e-04
Loss = 7.6415e-03, PNorm = 141.5235, GNorm = 0.2334, lr_0 = 4.3726e-04
Loss = 9.0368e-03, PNorm = 141.5400, GNorm = 0.1955, lr_0 = 4.3696e-04
Loss = 7.8142e-03, PNorm = 141.5583, GNorm = 0.1515, lr_0 = 4.3667e-04
Loss = 9.0358e-03, PNorm = 141.5735, GNorm = 0.2784, lr_0 = 4.3637e-04
Loss = 8.8303e-03, PNorm = 141.5915, GNorm = 0.3046, lr_0 = 4.3607e-04
Loss = 8.6836e-03, PNorm = 141.6119, GNorm = 0.1952, lr_0 = 4.3577e-04
Loss = 7.8758e-03, PNorm = 141.6276, GNorm = 0.1703, lr_0 = 4.3547e-04
Loss = 1.0522e-02, PNorm = 141.6478, GNorm = 0.2634, lr_0 = 4.3517e-04
Loss = 1.0158e-02, PNorm = 141.6668, GNorm = 0.5797, lr_0 = 4.3487e-04
Loss = 7.4983e-03, PNorm = 141.6857, GNorm = 0.1331, lr_0 = 4.3458e-04
Loss = 8.5462e-03, PNorm = 141.7079, GNorm = 0.1085, lr_0 = 4.3428e-04
Loss = 8.7472e-03, PNorm = 141.7276, GNorm = 0.3286, lr_0 = 4.3398e-04
Loss = 8.4032e-03, PNorm = 141.7441, GNorm = 0.1900, lr_0 = 4.3368e-04
Loss = 9.1720e-03, PNorm = 141.7577, GNorm = 0.3426, lr_0 = 4.3339e-04
Loss = 6.7023e-03, PNorm = 141.7721, GNorm = 0.1240, lr_0 = 4.3309e-04
Loss = 7.9333e-03, PNorm = 141.7891, GNorm = 0.2972, lr_0 = 4.3279e-04
Loss = 7.6729e-03, PNorm = 141.8079, GNorm = 0.3421, lr_0 = 4.3250e-04
Loss = 8.2997e-03, PNorm = 141.8221, GNorm = 0.5826, lr_0 = 4.3220e-04
Loss = 7.5617e-03, PNorm = 141.8390, GNorm = 0.1711, lr_0 = 4.3190e-04
Loss = 7.8498e-03, PNorm = 141.8568, GNorm = 0.4379, lr_0 = 4.3161e-04
Loss = 7.9575e-03, PNorm = 141.8751, GNorm = 0.3479, lr_0 = 4.3131e-04
Loss = 7.7658e-03, PNorm = 141.8888, GNorm = 0.2685, lr_0 = 4.3102e-04
Loss = 8.9049e-03, PNorm = 141.9074, GNorm = 0.2966, lr_0 = 4.3072e-04
Loss = 9.1713e-03, PNorm = 141.9291, GNorm = 0.1412, lr_0 = 4.3043e-04
Loss = 8.2304e-03, PNorm = 141.9511, GNorm = 0.6032, lr_0 = 4.3013e-04
Loss = 8.0488e-03, PNorm = 141.9689, GNorm = 0.3479, lr_0 = 4.2984e-04
Loss = 7.7996e-03, PNorm = 141.9855, GNorm = 0.1714, lr_0 = 4.2954e-04
Loss = 9.1807e-03, PNorm = 142.0071, GNorm = 0.3768, lr_0 = 4.2925e-04
Loss = 7.4665e-03, PNorm = 142.0260, GNorm = 0.3105, lr_0 = 4.2895e-04
Loss = 7.9805e-03, PNorm = 142.0447, GNorm = 0.5738, lr_0 = 4.2866e-04
Loss = 8.2857e-03, PNorm = 142.0636, GNorm = 0.2058, lr_0 = 4.2837e-04
Loss = 8.8125e-03, PNorm = 142.0817, GNorm = 0.3030, lr_0 = 4.2807e-04
Loss = 8.3814e-03, PNorm = 142.1050, GNorm = 0.3325, lr_0 = 4.2778e-04
Loss = 1.1925e-02, PNorm = 142.1275, GNorm = 0.1635, lr_0 = 4.2749e-04
Loss = 9.1878e-03, PNorm = 142.1475, GNorm = 0.2274, lr_0 = 4.2719e-04
Loss = 1.0520e-02, PNorm = 142.1710, GNorm = 0.2377, lr_0 = 4.2690e-04
Loss = 8.4489e-03, PNorm = 142.1909, GNorm = 0.1629, lr_0 = 4.2661e-04
Loss = 7.5245e-03, PNorm = 142.2149, GNorm = 0.2505, lr_0 = 4.2632e-04
Loss = 1.0791e-02, PNorm = 142.2293, GNorm = 0.2669, lr_0 = 4.2602e-04
Loss = 8.0475e-03, PNorm = 142.2478, GNorm = 0.4445, lr_0 = 4.2573e-04
Loss = 9.1820e-03, PNorm = 142.2622, GNorm = 0.3579, lr_0 = 4.2544e-04
Loss = 8.3376e-03, PNorm = 142.2815, GNorm = 0.2055, lr_0 = 4.2515e-04
Loss = 8.6245e-03, PNorm = 142.3068, GNorm = 0.2368, lr_0 = 4.2486e-04
Loss = 7.9684e-03, PNorm = 142.3229, GNorm = 0.3637, lr_0 = 4.2457e-04
Loss = 7.7482e-03, PNorm = 142.3434, GNorm = 0.5756, lr_0 = 4.2428e-04
Loss = 8.1115e-03, PNorm = 142.3604, GNorm = 0.2803, lr_0 = 4.2399e-04
Loss = 7.6593e-03, PNorm = 142.3793, GNorm = 0.1281, lr_0 = 4.2370e-04
Loss = 8.3248e-03, PNorm = 142.3979, GNorm = 0.2346, lr_0 = 4.2340e-04
Loss = 9.7950e-03, PNorm = 142.4150, GNorm = 0.4070, lr_0 = 4.2311e-04
Loss = 9.5625e-03, PNorm = 142.4268, GNorm = 0.2413, lr_0 = 4.2283e-04
Loss = 8.0109e-03, PNorm = 142.4429, GNorm = 0.4427, lr_0 = 4.2254e-04
Loss = 8.2750e-03, PNorm = 142.4658, GNorm = 0.2558, lr_0 = 4.2225e-04
Loss = 8.9038e-03, PNorm = 142.4878, GNorm = 0.3198, lr_0 = 4.2196e-04
Loss = 7.5062e-03, PNorm = 142.5098, GNorm = 0.4321, lr_0 = 4.2167e-04
Loss = 8.2029e-03, PNorm = 142.5299, GNorm = 0.3793, lr_0 = 4.2138e-04
Loss = 8.6796e-03, PNorm = 142.5505, GNorm = 0.3620, lr_0 = 4.2109e-04
Loss = 7.2605e-03, PNorm = 142.5702, GNorm = 0.1560, lr_0 = 4.2080e-04
Loss = 8.5809e-03, PNorm = 142.5879, GNorm = 0.7359, lr_0 = 4.2051e-04
Loss = 8.0986e-03, PNorm = 142.6062, GNorm = 0.1353, lr_0 = 4.2023e-04
Loss = 8.8568e-03, PNorm = 142.6258, GNorm = 0.3803, lr_0 = 4.1994e-04
Loss = 8.8813e-03, PNorm = 142.6475, GNorm = 0.2935, lr_0 = 4.1965e-04
Loss = 1.1133e-02, PNorm = 142.6651, GNorm = 0.1182, lr_0 = 4.1936e-04
Loss = 8.8890e-03, PNorm = 142.6793, GNorm = 0.3785, lr_0 = 4.1907e-04
Loss = 8.7299e-03, PNorm = 142.6997, GNorm = 0.3065, lr_0 = 4.1879e-04
Loss = 9.6013e-03, PNorm = 142.7185, GNorm = 0.4185, lr_0 = 4.1850e-04
Loss = 7.8394e-03, PNorm = 142.7422, GNorm = 0.0849, lr_0 = 4.1821e-04
Loss = 1.3005e-02, PNorm = 142.7618, GNorm = 0.3969, lr_0 = 4.1793e-04
Loss = 7.8042e-03, PNorm = 142.7834, GNorm = 0.1696, lr_0 = 4.1764e-04
Loss = 8.8113e-03, PNorm = 142.8022, GNorm = 0.2901, lr_0 = 4.1736e-04
Loss = 8.4583e-03, PNorm = 142.8210, GNorm = 0.2733, lr_0 = 4.1707e-04
Loss = 8.5210e-03, PNorm = 142.8425, GNorm = 0.1820, lr_0 = 4.1678e-04
Loss = 7.3518e-03, PNorm = 142.8655, GNorm = 0.2413, lr_0 = 4.1650e-04
Loss = 1.1020e-02, PNorm = 142.8854, GNorm = 0.1859, lr_0 = 4.1621e-04
Loss = 7.5321e-03, PNorm = 142.9034, GNorm = 0.2209, lr_0 = 4.1593e-04
Loss = 8.1525e-03, PNorm = 142.9181, GNorm = 0.0952, lr_0 = 4.1564e-04
Loss = 9.8430e-03, PNorm = 142.9373, GNorm = 0.2863, lr_0 = 4.1536e-04
Loss = 8.5380e-03, PNorm = 142.9632, GNorm = 0.7211, lr_0 = 4.1507e-04
Loss = 1.3631e-02, PNorm = 142.9869, GNorm = 0.5640, lr_0 = 4.1479e-04
Loss = 7.5829e-03, PNorm = 143.0145, GNorm = 0.3285, lr_0 = 4.1450e-04
Loss = 8.9572e-03, PNorm = 143.0367, GNorm = 0.2100, lr_0 = 4.1422e-04
Loss = 9.8980e-03, PNorm = 143.0549, GNorm = 0.1841, lr_0 = 4.1394e-04
Loss = 8.8875e-03, PNorm = 143.0731, GNorm = 0.1509, lr_0 = 4.1365e-04
Loss = 9.7167e-03, PNorm = 143.0979, GNorm = 0.1027, lr_0 = 4.1337e-04
Loss = 9.0823e-03, PNorm = 143.1198, GNorm = 0.1681, lr_0 = 4.1309e-04
Loss = 9.3294e-03, PNorm = 143.1422, GNorm = 0.2114, lr_0 = 4.1280e-04
Loss = 7.4072e-03, PNorm = 143.1670, GNorm = 0.2560, lr_0 = 4.1252e-04
Loss = 1.0688e-02, PNorm = 143.1901, GNorm = 0.2258, lr_0 = 4.1224e-04
Loss = 8.0981e-03, PNorm = 143.2149, GNorm = 0.4957, lr_0 = 4.1196e-04
Loss = 1.2223e-02, PNorm = 143.2369, GNorm = 0.4143, lr_0 = 4.1167e-04
Loss = 9.1049e-03, PNorm = 143.2578, GNorm = 0.2966, lr_0 = 4.1139e-04
Loss = 8.6354e-03, PNorm = 143.2778, GNorm = 0.2926, lr_0 = 4.1111e-04
Loss = 7.9351e-03, PNorm = 143.3039, GNorm = 0.4464, lr_0 = 4.1083e-04
Loss = 9.4552e-03, PNorm = 143.3297, GNorm = 0.2180, lr_0 = 4.1055e-04
Loss = 9.7430e-03, PNorm = 143.3475, GNorm = 0.2352, lr_0 = 4.1027e-04
Loss = 9.6276e-03, PNorm = 143.3637, GNorm = 0.2055, lr_0 = 4.0998e-04
Loss = 9.4181e-03, PNorm = 143.3843, GNorm = 0.1845, lr_0 = 4.0970e-04
Loss = 1.0417e-02, PNorm = 143.4067, GNorm = 0.2712, lr_0 = 4.0942e-04
Loss = 7.8245e-03, PNorm = 143.4292, GNorm = 0.2800, lr_0 = 4.0914e-04
Loss = 1.0143e-02, PNorm = 143.4517, GNorm = 0.2892, lr_0 = 4.0886e-04
Loss = 8.3270e-03, PNorm = 143.4752, GNorm = 0.1299, lr_0 = 4.0858e-04
Loss = 1.0309e-02, PNorm = 143.4965, GNorm = 0.1146, lr_0 = 4.0830e-04
Loss = 8.4343e-03, PNorm = 143.5169, GNorm = 0.2573, lr_0 = 4.0802e-04
Loss = 9.7635e-03, PNorm = 143.5376, GNorm = 0.1339, lr_0 = 4.0774e-04
Loss = 1.2216e-02, PNorm = 143.5570, GNorm = 0.1670, lr_0 = 4.0746e-04
Loss = 1.0966e-02, PNorm = 143.5763, GNorm = 0.3776, lr_0 = 4.0718e-04
Loss = 8.3689e-03, PNorm = 143.5988, GNorm = 0.7187, lr_0 = 4.0691e-04
Loss = 8.6149e-03, PNorm = 143.6194, GNorm = 0.2089, lr_0 = 4.0663e-04
Loss = 9.9519e-03, PNorm = 143.6456, GNorm = 0.4209, lr_0 = 4.0635e-04
Loss = 1.0487e-02, PNorm = 143.6674, GNorm = 0.4605, lr_0 = 4.0607e-04
Loss = 8.9247e-03, PNorm = 143.6900, GNorm = 0.2715, lr_0 = 4.0579e-04
Loss = 8.7329e-03, PNorm = 143.7108, GNorm = 0.4996, lr_0 = 4.0551e-04
Loss = 9.4310e-03, PNorm = 143.7374, GNorm = 0.2289, lr_0 = 4.0524e-04
Loss = 7.7414e-03, PNorm = 143.7607, GNorm = 0.2990, lr_0 = 4.0496e-04
Loss = 8.9169e-03, PNorm = 143.7817, GNorm = 0.0855, lr_0 = 4.0468e-04
Validation mae = 0.478556
Epoch 13
Loss = 7.4280e-03, PNorm = 143.8012, GNorm = 0.2528, lr_0 = 4.0440e-04
Loss = 8.6186e-03, PNorm = 143.8180, GNorm = 0.1572, lr_0 = 4.0413e-04
Loss = 9.1278e-03, PNorm = 143.8325, GNorm = 0.1803, lr_0 = 4.0385e-04
Loss = 6.4498e-03, PNorm = 143.8472, GNorm = 0.1806, lr_0 = 4.0357e-04
Loss = 6.3247e-03, PNorm = 143.8621, GNorm = 0.3005, lr_0 = 4.0330e-04
Loss = 9.6949e-03, PNorm = 143.8777, GNorm = 0.4911, lr_0 = 4.0302e-04
Loss = 6.1437e-03, PNorm = 143.8899, GNorm = 0.5395, lr_0 = 4.0274e-04
Loss = 1.0680e-02, PNorm = 143.9014, GNorm = 0.2516, lr_0 = 4.0247e-04
Loss = 7.3441e-03, PNorm = 143.9216, GNorm = 0.1475, lr_0 = 4.0219e-04
Loss = 7.1594e-03, PNorm = 143.9384, GNorm = 0.3085, lr_0 = 4.0192e-04
Loss = 7.1416e-03, PNorm = 143.9533, GNorm = 0.1719, lr_0 = 4.0164e-04
Loss = 7.3969e-03, PNorm = 143.9700, GNorm = 0.1704, lr_0 = 4.0137e-04
Loss = 8.5474e-03, PNorm = 143.9860, GNorm = 0.1792, lr_0 = 4.0109e-04
Loss = 6.2833e-03, PNorm = 143.9975, GNorm = 0.1191, lr_0 = 4.0082e-04
Loss = 6.7597e-03, PNorm = 144.0106, GNorm = 0.1284, lr_0 = 4.0054e-04
Loss = 7.8629e-03, PNorm = 144.0230, GNorm = 0.1944, lr_0 = 4.0027e-04
Loss = 8.1429e-03, PNorm = 144.0376, GNorm = 0.2521, lr_0 = 3.9999e-04
Loss = 7.6209e-03, PNorm = 144.0528, GNorm = 0.1718, lr_0 = 3.9972e-04
Loss = 6.8775e-03, PNorm = 144.0665, GNorm = 0.1777, lr_0 = 3.9945e-04
Loss = 6.0768e-03, PNorm = 144.0775, GNorm = 0.1344, lr_0 = 3.9917e-04
Loss = 7.5573e-03, PNorm = 144.0891, GNorm = 0.5546, lr_0 = 3.9890e-04
Loss = 7.1954e-03, PNorm = 144.1018, GNorm = 0.3175, lr_0 = 3.9863e-04
Loss = 6.2878e-03, PNorm = 144.1170, GNorm = 0.1868, lr_0 = 3.9835e-04
Loss = 6.8160e-03, PNorm = 144.1337, GNorm = 0.1995, lr_0 = 3.9808e-04
Loss = 5.7299e-03, PNorm = 144.1537, GNorm = 0.4007, lr_0 = 3.9781e-04
Loss = 6.7151e-03, PNorm = 144.1683, GNorm = 0.2165, lr_0 = 3.9753e-04
Loss = 7.6938e-03, PNorm = 144.1791, GNorm = 0.4605, lr_0 = 3.9726e-04
Loss = 7.2635e-03, PNorm = 144.1892, GNorm = 0.3598, lr_0 = 3.9699e-04
Loss = 7.9226e-03, PNorm = 144.2021, GNorm = 0.3871, lr_0 = 3.9672e-04
Loss = 7.1026e-03, PNorm = 144.2175, GNorm = 0.1549, lr_0 = 3.9645e-04
Loss = 8.1494e-03, PNorm = 144.2339, GNorm = 0.1901, lr_0 = 3.9617e-04
Loss = 6.7019e-03, PNorm = 144.2554, GNorm = 0.1117, lr_0 = 3.9590e-04
Loss = 8.1089e-03, PNorm = 144.2718, GNorm = 0.3319, lr_0 = 3.9563e-04
Loss = 7.2958e-03, PNorm = 144.2850, GNorm = 0.2058, lr_0 = 3.9536e-04
Loss = 7.6533e-03, PNorm = 144.2979, GNorm = 0.2797, lr_0 = 3.9509e-04
Loss = 6.1515e-03, PNorm = 144.3120, GNorm = 0.1678, lr_0 = 3.9482e-04
Loss = 6.5949e-03, PNorm = 144.3254, GNorm = 0.0860, lr_0 = 3.9455e-04
Loss = 7.9746e-03, PNorm = 144.3385, GNorm = 0.2393, lr_0 = 3.9428e-04
Loss = 6.0555e-03, PNorm = 144.3560, GNorm = 0.1951, lr_0 = 3.9401e-04
Loss = 8.6673e-03, PNorm = 144.3746, GNorm = 0.2211, lr_0 = 3.9374e-04
Loss = 9.0080e-03, PNorm = 144.3903, GNorm = 0.2585, lr_0 = 3.9347e-04
Loss = 6.8728e-03, PNorm = 144.4048, GNorm = 0.2855, lr_0 = 3.9320e-04
Loss = 6.9848e-03, PNorm = 144.4185, GNorm = 0.2565, lr_0 = 3.9293e-04
Loss = 6.2955e-03, PNorm = 144.4360, GNorm = 0.2602, lr_0 = 3.9266e-04
Loss = 6.9692e-03, PNorm = 144.4515, GNorm = 0.2041, lr_0 = 3.9239e-04
Loss = 1.1167e-02, PNorm = 144.4660, GNorm = 1.2758, lr_0 = 3.9212e-04
Loss = 9.5150e-03, PNorm = 144.4798, GNorm = 0.1835, lr_0 = 3.9185e-04
Loss = 7.8371e-03, PNorm = 144.4962, GNorm = 0.3028, lr_0 = 3.9159e-04
Loss = 8.4699e-03, PNorm = 144.5133, GNorm = 0.4577, lr_0 = 3.9132e-04
Loss = 8.9193e-03, PNorm = 144.5316, GNorm = 0.1994, lr_0 = 3.9105e-04
Loss = 7.9425e-03, PNorm = 144.5511, GNorm = 0.1744, lr_0 = 3.9078e-04
Loss = 6.6631e-03, PNorm = 144.5681, GNorm = 0.1187, lr_0 = 3.9051e-04
Loss = 6.8348e-03, PNorm = 144.5822, GNorm = 0.1133, lr_0 = 3.9025e-04
Loss = 1.0133e-02, PNorm = 144.6012, GNorm = 0.2047, lr_0 = 3.8998e-04
Loss = 8.2670e-03, PNorm = 144.6166, GNorm = 0.2001, lr_0 = 3.8971e-04
Loss = 5.9358e-03, PNorm = 144.6313, GNorm = 0.1047, lr_0 = 3.8945e-04
Loss = 8.8970e-03, PNorm = 144.6469, GNorm = 0.4617, lr_0 = 3.8918e-04
Loss = 6.7893e-03, PNorm = 144.6586, GNorm = 0.1922, lr_0 = 3.8891e-04
Loss = 6.9490e-03, PNorm = 144.6756, GNorm = 0.2730, lr_0 = 3.8865e-04
Loss = 6.9820e-03, PNorm = 144.6924, GNorm = 0.3964, lr_0 = 3.8838e-04
Loss = 7.0734e-03, PNorm = 144.7102, GNorm = 0.1492, lr_0 = 3.8811e-04
Loss = 7.0803e-03, PNorm = 144.7279, GNorm = 0.1380, lr_0 = 3.8785e-04
Loss = 7.3595e-03, PNorm = 144.7441, GNorm = 0.3558, lr_0 = 3.8758e-04
Loss = 7.4584e-03, PNorm = 144.7563, GNorm = 0.4317, lr_0 = 3.8732e-04
Loss = 7.3072e-03, PNorm = 144.7743, GNorm = 0.2326, lr_0 = 3.8705e-04
Loss = 8.5138e-03, PNorm = 144.7867, GNorm = 0.1523, lr_0 = 3.8679e-04
Loss = 7.6653e-03, PNorm = 144.8003, GNorm = 0.0966, lr_0 = 3.8652e-04
Loss = 7.0730e-03, PNorm = 144.8162, GNorm = 0.1931, lr_0 = 3.8626e-04
Loss = 6.8641e-03, PNorm = 144.8319, GNorm = 0.1541, lr_0 = 3.8599e-04
Loss = 8.0664e-03, PNorm = 144.8474, GNorm = 0.0945, lr_0 = 3.8573e-04
Loss = 6.5729e-03, PNorm = 144.8686, GNorm = 0.3015, lr_0 = 3.8546e-04
Loss = 7.3891e-03, PNorm = 144.8853, GNorm = 0.3216, lr_0 = 3.8520e-04
Loss = 6.9233e-03, PNorm = 144.9052, GNorm = 0.2270, lr_0 = 3.8493e-04
Loss = 7.6147e-03, PNorm = 144.9179, GNorm = 0.2493, lr_0 = 3.8467e-04
Loss = 6.4711e-03, PNorm = 144.9304, GNorm = 0.1449, lr_0 = 3.8441e-04
Loss = 8.2850e-03, PNorm = 144.9450, GNorm = 0.0921, lr_0 = 3.8414e-04
Loss = 6.4447e-03, PNorm = 144.9662, GNorm = 0.1402, lr_0 = 3.8388e-04
Loss = 6.1328e-03, PNorm = 144.9842, GNorm = 0.2588, lr_0 = 3.8362e-04
Loss = 8.1491e-03, PNorm = 144.9953, GNorm = 0.2907, lr_0 = 3.8336e-04
Loss = 7.8124e-03, PNorm = 145.0096, GNorm = 0.1962, lr_0 = 3.8309e-04
Loss = 7.2159e-03, PNorm = 145.0285, GNorm = 0.1226, lr_0 = 3.8283e-04
Loss = 7.1732e-03, PNorm = 145.0483, GNorm = 0.0917, lr_0 = 3.8257e-04
Loss = 6.2188e-03, PNorm = 145.0647, GNorm = 0.3401, lr_0 = 3.8231e-04
Loss = 6.7509e-03, PNorm = 145.0826, GNorm = 0.1170, lr_0 = 3.8204e-04
Loss = 6.2689e-03, PNorm = 145.0999, GNorm = 0.2627, lr_0 = 3.8178e-04
Loss = 7.7631e-03, PNorm = 145.1144, GNorm = 0.1526, lr_0 = 3.8152e-04
Loss = 7.4671e-03, PNorm = 145.1299, GNorm = 0.1329, lr_0 = 3.8126e-04
Loss = 7.2447e-03, PNorm = 145.1462, GNorm = 0.1986, lr_0 = 3.8100e-04
Loss = 7.3491e-03, PNorm = 145.1624, GNorm = 0.4211, lr_0 = 3.8074e-04
Loss = 7.8435e-03, PNorm = 145.1803, GNorm = 0.2674, lr_0 = 3.8048e-04
Loss = 5.7495e-03, PNorm = 145.1958, GNorm = 0.1298, lr_0 = 3.8022e-04
Loss = 7.0881e-03, PNorm = 145.2144, GNorm = 0.1645, lr_0 = 3.7995e-04
Loss = 6.5290e-03, PNorm = 145.2343, GNorm = 0.4175, lr_0 = 3.7969e-04
Loss = 7.4778e-03, PNorm = 145.2514, GNorm = 0.2921, lr_0 = 3.7943e-04
Loss = 6.8478e-03, PNorm = 145.2698, GNorm = 0.2820, lr_0 = 3.7917e-04
Loss = 5.7999e-03, PNorm = 145.2853, GNorm = 0.3863, lr_0 = 3.7891e-04
Loss = 8.5714e-03, PNorm = 145.3039, GNorm = 0.3913, lr_0 = 3.7866e-04
Loss = 6.3070e-03, PNorm = 145.3214, GNorm = 0.1187, lr_0 = 3.7840e-04
Loss = 8.8515e-03, PNorm = 145.3411, GNorm = 0.2716, lr_0 = 3.7814e-04
Loss = 9.0939e-03, PNorm = 145.3581, GNorm = 0.2309, lr_0 = 3.7788e-04
Loss = 6.5976e-03, PNorm = 145.3764, GNorm = 0.1680, lr_0 = 3.7762e-04
Loss = 7.9981e-03, PNorm = 145.3906, GNorm = 0.1345, lr_0 = 3.7736e-04
Loss = 8.9279e-03, PNorm = 145.4089, GNorm = 0.3130, lr_0 = 3.7710e-04
Loss = 9.5466e-03, PNorm = 145.4243, GNorm = 0.3718, lr_0 = 3.7684e-04
Loss = 6.9777e-03, PNorm = 145.4417, GNorm = 0.3643, lr_0 = 3.7659e-04
Loss = 6.4296e-03, PNorm = 145.4614, GNorm = 0.2019, lr_0 = 3.7633e-04
Loss = 1.1154e-02, PNorm = 145.4857, GNorm = 0.4771, lr_0 = 3.7607e-04
Loss = 7.2226e-03, PNorm = 145.5053, GNorm = 0.4928, lr_0 = 3.7581e-04
Loss = 7.7055e-03, PNorm = 145.5243, GNorm = 0.1358, lr_0 = 3.7555e-04
Loss = 7.9667e-03, PNorm = 145.5372, GNorm = 0.1684, lr_0 = 3.7530e-04
Loss = 6.9889e-03, PNorm = 145.5535, GNorm = 0.2024, lr_0 = 3.7504e-04
Loss = 8.1902e-03, PNorm = 145.5712, GNorm = 0.1770, lr_0 = 3.7478e-04
Loss = 6.3419e-03, PNorm = 145.5845, GNorm = 0.0763, lr_0 = 3.7453e-04
Loss = 7.8700e-03, PNorm = 145.5989, GNorm = 0.2001, lr_0 = 3.7427e-04
Loss = 8.4895e-03, PNorm = 145.6162, GNorm = 0.1084, lr_0 = 3.7401e-04
Loss = 6.9546e-03, PNorm = 145.6382, GNorm = 0.2131, lr_0 = 3.7376e-04
Loss = 7.6640e-03, PNorm = 145.6593, GNorm = 0.3340, lr_0 = 3.7350e-04
Loss = 7.1056e-03, PNorm = 145.6828, GNorm = 0.1930, lr_0 = 3.7325e-04
Loss = 8.4028e-03, PNorm = 145.7030, GNorm = 0.1701, lr_0 = 3.7299e-04
Loss = 8.2248e-03, PNorm = 145.7202, GNorm = 0.1327, lr_0 = 3.7273e-04
Validation mae = 0.477975
Epoch 14
Loss = 6.2493e-03, PNorm = 145.7357, GNorm = 0.1278, lr_0 = 3.7248e-04
Loss = 6.5708e-03, PNorm = 145.7451, GNorm = 0.2317, lr_0 = 3.7222e-04
Loss = 7.8846e-03, PNorm = 145.7535, GNorm = 0.2287, lr_0 = 3.7197e-04
Loss = 5.5269e-03, PNorm = 145.7660, GNorm = 0.0732, lr_0 = 3.7171e-04
Loss = 6.2110e-03, PNorm = 145.7786, GNorm = 0.0818, lr_0 = 3.7146e-04
Loss = 7.6698e-03, PNorm = 145.7916, GNorm = 0.1770, lr_0 = 3.7120e-04
Loss = 5.7187e-03, PNorm = 145.8078, GNorm = 0.2264, lr_0 = 3.7095e-04
Loss = 7.3685e-03, PNorm = 145.8229, GNorm = 0.2673, lr_0 = 3.7070e-04
Loss = 5.9598e-03, PNorm = 145.8404, GNorm = 0.2410, lr_0 = 3.7044e-04
Loss = 8.1724e-03, PNorm = 145.8569, GNorm = 0.3282, lr_0 = 3.7019e-04
Loss = 5.3123e-03, PNorm = 145.8696, GNorm = 0.2605, lr_0 = 3.6993e-04
Loss = 6.3116e-03, PNorm = 145.8787, GNorm = 0.0761, lr_0 = 3.6968e-04
Loss = 6.8611e-03, PNorm = 145.8895, GNorm = 0.2249, lr_0 = 3.6943e-04
Loss = 6.0703e-03, PNorm = 145.9072, GNorm = 0.0967, lr_0 = 3.6917e-04
Loss = 6.4659e-03, PNorm = 145.9220, GNorm = 0.2276, lr_0 = 3.6892e-04
Loss = 5.9191e-03, PNorm = 145.9322, GNorm = 0.2649, lr_0 = 3.6867e-04
Loss = 6.5236e-03, PNorm = 145.9437, GNorm = 0.2034, lr_0 = 3.6842e-04
Loss = 6.6687e-03, PNorm = 145.9545, GNorm = 0.1271, lr_0 = 3.6816e-04
Loss = 6.5055e-03, PNorm = 145.9704, GNorm = 0.3010, lr_0 = 3.6791e-04
Loss = 7.1918e-03, PNorm = 145.9843, GNorm = 0.1925, lr_0 = 3.6766e-04
Loss = 6.9315e-03, PNorm = 145.9964, GNorm = 0.4526, lr_0 = 3.6741e-04
Loss = 5.7972e-03, PNorm = 146.0087, GNorm = 0.1591, lr_0 = 3.6716e-04
Loss = 5.4167e-03, PNorm = 146.0166, GNorm = 0.1476, lr_0 = 3.6690e-04
Loss = 5.9258e-03, PNorm = 146.0265, GNorm = 0.3457, lr_0 = 3.6665e-04
Loss = 9.0482e-03, PNorm = 146.0376, GNorm = 0.4570, lr_0 = 3.6640e-04
Loss = 6.4764e-03, PNorm = 146.0494, GNorm = 0.1493, lr_0 = 3.6615e-04
Loss = 5.5619e-03, PNorm = 146.0598, GNorm = 0.1288, lr_0 = 3.6590e-04
Loss = 5.4137e-03, PNorm = 146.0753, GNorm = 0.3269, lr_0 = 3.6565e-04
Loss = 6.1736e-03, PNorm = 146.0901, GNorm = 0.1700, lr_0 = 3.6540e-04
Loss = 7.6964e-03, PNorm = 146.1062, GNorm = 0.2531, lr_0 = 3.6515e-04
Loss = 5.7787e-03, PNorm = 146.1147, GNorm = 0.1085, lr_0 = 3.6490e-04
Loss = 4.5039e-03, PNorm = 146.1246, GNorm = 0.2667, lr_0 = 3.6465e-04
Loss = 5.7004e-03, PNorm = 146.1394, GNorm = 0.1647, lr_0 = 3.6440e-04
Loss = 5.9071e-03, PNorm = 146.1552, GNorm = 0.3213, lr_0 = 3.6415e-04
Loss = 5.8713e-03, PNorm = 146.1678, GNorm = 0.0869, lr_0 = 3.6390e-04
Loss = 6.6910e-03, PNorm = 146.1811, GNorm = 0.1002, lr_0 = 3.6365e-04
Loss = 5.4799e-03, PNorm = 146.1925, GNorm = 0.1713, lr_0 = 3.6340e-04
Loss = 5.1322e-03, PNorm = 146.2043, GNorm = 0.2749, lr_0 = 3.6315e-04
Loss = 6.1650e-03, PNorm = 146.2194, GNorm = 0.3717, lr_0 = 3.6290e-04
Loss = 5.6858e-03, PNorm = 146.2279, GNorm = 0.2213, lr_0 = 3.6266e-04
Loss = 8.1519e-03, PNorm = 146.2369, GNorm = 0.3535, lr_0 = 3.6241e-04
Loss = 5.3094e-03, PNorm = 146.2471, GNorm = 0.4216, lr_0 = 3.6216e-04
Loss = 6.8884e-03, PNorm = 146.2603, GNorm = 0.1847, lr_0 = 3.6191e-04
Loss = 6.8802e-03, PNorm = 146.2751, GNorm = 0.2830, lr_0 = 3.6166e-04
Loss = 7.2868e-03, PNorm = 146.2879, GNorm = 0.2551, lr_0 = 3.6141e-04
Loss = 5.7223e-03, PNorm = 146.2981, GNorm = 0.1360, lr_0 = 3.6117e-04
Loss = 6.0502e-03, PNorm = 146.3106, GNorm = 0.1751, lr_0 = 3.6092e-04
Loss = 5.6988e-03, PNorm = 146.3254, GNorm = 0.3313, lr_0 = 3.6067e-04
Loss = 7.1067e-03, PNorm = 146.3400, GNorm = 0.0723, lr_0 = 3.6043e-04
Loss = 6.0672e-03, PNorm = 146.3549, GNorm = 0.4356, lr_0 = 3.6018e-04
Loss = 8.4120e-03, PNorm = 146.3685, GNorm = 0.2068, lr_0 = 3.5993e-04
Loss = 6.0194e-03, PNorm = 146.3818, GNorm = 0.2099, lr_0 = 3.5969e-04
Loss = 8.6950e-03, PNorm = 146.3932, GNorm = 0.9345, lr_0 = 3.5944e-04
Loss = 6.3135e-03, PNorm = 146.4043, GNorm = 0.5347, lr_0 = 3.5919e-04
Loss = 5.6611e-03, PNorm = 146.4146, GNorm = 0.1914, lr_0 = 3.5895e-04
Loss = 6.8510e-03, PNorm = 146.4245, GNorm = 0.3823, lr_0 = 3.5870e-04
Loss = 6.0432e-03, PNorm = 146.4437, GNorm = 0.2874, lr_0 = 3.5845e-04
Loss = 6.4752e-03, PNorm = 146.4609, GNorm = 0.1772, lr_0 = 3.5821e-04
Loss = 7.3148e-03, PNorm = 146.4778, GNorm = 0.3498, lr_0 = 3.5796e-04
Loss = 6.9979e-03, PNorm = 146.4951, GNorm = 0.1059, lr_0 = 3.5772e-04
Loss = 5.6426e-03, PNorm = 146.5126, GNorm = 0.1812, lr_0 = 3.5747e-04
Loss = 5.6027e-03, PNorm = 146.5288, GNorm = 0.1715, lr_0 = 3.5723e-04
Loss = 6.2351e-03, PNorm = 146.5397, GNorm = 0.1209, lr_0 = 3.5698e-04
Loss = 6.9629e-03, PNorm = 146.5501, GNorm = 0.1649, lr_0 = 3.5674e-04
Loss = 5.2425e-03, PNorm = 146.5639, GNorm = 0.0660, lr_0 = 3.5650e-04
Loss = 6.0255e-03, PNorm = 146.5767, GNorm = 0.3112, lr_0 = 3.5625e-04
Loss = 8.3995e-03, PNorm = 146.5879, GNorm = 0.1674, lr_0 = 3.5601e-04
Loss = 5.7663e-03, PNorm = 146.6011, GNorm = 0.2619, lr_0 = 3.5576e-04
Loss = 7.6947e-03, PNorm = 146.6152, GNorm = 0.1160, lr_0 = 3.5552e-04
Loss = 5.6907e-03, PNorm = 146.6346, GNorm = 0.2733, lr_0 = 3.5528e-04
Loss = 6.5818e-03, PNorm = 146.6485, GNorm = 0.4035, lr_0 = 3.5503e-04
Loss = 6.0270e-03, PNorm = 146.6613, GNorm = 0.2115, lr_0 = 3.5479e-04
Loss = 5.3687e-03, PNorm = 146.6731, GNorm = 0.1828, lr_0 = 3.5455e-04
Loss = 6.4871e-03, PNorm = 146.6859, GNorm = 0.1605, lr_0 = 3.5430e-04
Loss = 7.3076e-03, PNorm = 146.7018, GNorm = 0.0962, lr_0 = 3.5406e-04
Loss = 7.2955e-03, PNorm = 146.7165, GNorm = 0.1441, lr_0 = 3.5382e-04
Loss = 5.0395e-03, PNorm = 146.7331, GNorm = 0.1352, lr_0 = 3.5358e-04
Loss = 6.2079e-03, PNorm = 146.7462, GNorm = 0.4175, lr_0 = 3.5333e-04
Loss = 6.3844e-03, PNorm = 146.7539, GNorm = 0.3518, lr_0 = 3.5309e-04
Loss = 6.1120e-03, PNorm = 146.7693, GNorm = 0.1249, lr_0 = 3.5285e-04
Loss = 6.1464e-03, PNorm = 146.7861, GNorm = 0.2178, lr_0 = 3.5261e-04
Loss = 5.6192e-03, PNorm = 146.8030, GNorm = 0.1530, lr_0 = 3.5237e-04
Loss = 5.7696e-03, PNorm = 146.8185, GNorm = 0.2886, lr_0 = 3.5212e-04
Loss = 5.1821e-03, PNorm = 146.8353, GNorm = 0.1447, lr_0 = 3.5188e-04
Loss = 6.6529e-03, PNorm = 146.8486, GNorm = 0.1690, lr_0 = 3.5164e-04
Loss = 5.9612e-03, PNorm = 146.8659, GNorm = 0.2203, lr_0 = 3.5140e-04
Loss = 5.6488e-03, PNorm = 146.8798, GNorm = 0.0810, lr_0 = 3.5116e-04
Loss = 4.9086e-03, PNorm = 146.8919, GNorm = 0.2598, lr_0 = 3.5092e-04
Loss = 5.1950e-03, PNorm = 146.9072, GNorm = 0.0710, lr_0 = 3.5068e-04
Loss = 6.0703e-03, PNorm = 146.9219, GNorm = 0.4117, lr_0 = 3.5044e-04
Loss = 6.1003e-03, PNorm = 146.9349, GNorm = 0.1483, lr_0 = 3.5020e-04
Loss = 6.2193e-03, PNorm = 146.9507, GNorm = 0.3237, lr_0 = 3.4996e-04
Loss = 4.7711e-03, PNorm = 146.9624, GNorm = 0.2711, lr_0 = 3.4972e-04
Loss = 6.1539e-03, PNorm = 146.9805, GNorm = 0.1365, lr_0 = 3.4948e-04
Loss = 6.6639e-03, PNorm = 146.9950, GNorm = 0.0816, lr_0 = 3.4924e-04
Loss = 5.6912e-03, PNorm = 147.0104, GNorm = 0.1862, lr_0 = 3.4900e-04
Loss = 5.1845e-03, PNorm = 147.0222, GNorm = 0.2255, lr_0 = 3.4876e-04
Loss = 6.0958e-03, PNorm = 147.0361, GNorm = 0.2481, lr_0 = 3.4852e-04
Loss = 5.8153e-03, PNorm = 147.0525, GNorm = 0.3811, lr_0 = 3.4828e-04
Loss = 5.1883e-03, PNorm = 147.0647, GNorm = 0.0834, lr_0 = 3.4805e-04
Loss = 6.6846e-03, PNorm = 147.0795, GNorm = 0.3052, lr_0 = 3.4781e-04
Loss = 6.1314e-03, PNorm = 147.0934, GNorm = 0.1118, lr_0 = 3.4757e-04
Loss = 5.3033e-03, PNorm = 147.1085, GNorm = 0.1146, lr_0 = 3.4733e-04
Loss = 7.0407e-03, PNorm = 147.1232, GNorm = 0.5602, lr_0 = 3.4709e-04
Loss = 7.2776e-03, PNorm = 147.1336, GNorm = 0.1777, lr_0 = 3.4686e-04
Loss = 6.1367e-03, PNorm = 147.1445, GNorm = 0.2855, lr_0 = 3.4662e-04
Loss = 7.2937e-03, PNorm = 147.1547, GNorm = 0.3709, lr_0 = 3.4638e-04
Loss = 5.8124e-03, PNorm = 147.1659, GNorm = 0.4655, lr_0 = 3.4614e-04
Loss = 5.5603e-03, PNorm = 147.1775, GNorm = 0.2015, lr_0 = 3.4591e-04
Loss = 6.5688e-03, PNorm = 147.1892, GNorm = 0.2985, lr_0 = 3.4567e-04
Loss = 5.5419e-03, PNorm = 147.2032, GNorm = 0.2463, lr_0 = 3.4543e-04
Loss = 5.3501e-03, PNorm = 147.2208, GNorm = 0.3009, lr_0 = 3.4520e-04
Loss = 7.1663e-03, PNorm = 147.2365, GNorm = 0.2885, lr_0 = 3.4496e-04
Loss = 6.1905e-03, PNorm = 147.2547, GNorm = 0.1988, lr_0 = 3.4472e-04
Loss = 7.1925e-03, PNorm = 147.2699, GNorm = 0.2096, lr_0 = 3.4449e-04
Loss = 6.4366e-03, PNorm = 147.2874, GNorm = 0.1697, lr_0 = 3.4425e-04
Loss = 6.0812e-03, PNorm = 147.3020, GNorm = 0.2150, lr_0 = 3.4402e-04
Loss = 5.6861e-03, PNorm = 147.3167, GNorm = 0.2417, lr_0 = 3.4378e-04
Loss = 5.8790e-03, PNorm = 147.3319, GNorm = 0.2195, lr_0 = 3.4354e-04
Loss = 5.2050e-03, PNorm = 147.3478, GNorm = 0.1478, lr_0 = 3.4331e-04
Validation mae = 0.477950
Epoch 15
Loss = 4.3626e-03, PNorm = 147.3587, GNorm = 0.1479, lr_0 = 3.4307e-04
Loss = 5.9780e-03, PNorm = 147.3703, GNorm = 0.3282, lr_0 = 3.4284e-04
Loss = 5.3041e-03, PNorm = 147.3818, GNorm = 0.1932, lr_0 = 3.4260e-04
Loss = 6.0850e-03, PNorm = 147.3918, GNorm = 0.2691, lr_0 = 3.4237e-04
Loss = 5.1034e-03, PNorm = 147.3994, GNorm = 0.1276, lr_0 = 3.4213e-04
Loss = 5.2725e-03, PNorm = 147.4059, GNorm = 0.2485, lr_0 = 3.4190e-04
Loss = 7.5798e-03, PNorm = 147.4187, GNorm = 0.2486, lr_0 = 3.4167e-04
Loss = 8.6156e-03, PNorm = 147.4288, GNorm = 0.2605, lr_0 = 3.4143e-04
Loss = 5.1171e-03, PNorm = 147.4423, GNorm = 0.1618, lr_0 = 3.4120e-04
Loss = 5.2653e-03, PNorm = 147.4535, GNorm = 0.1127, lr_0 = 3.4096e-04
Loss = 4.7666e-03, PNorm = 147.4648, GNorm = 0.2756, lr_0 = 3.4073e-04
Loss = 5.9361e-03, PNorm = 147.4744, GNorm = 0.1305, lr_0 = 3.4050e-04
Loss = 4.4478e-03, PNorm = 147.4870, GNorm = 0.1141, lr_0 = 3.4026e-04
Loss = 4.6079e-03, PNorm = 147.4982, GNorm = 0.3592, lr_0 = 3.4003e-04
Loss = 5.9082e-03, PNorm = 147.5110, GNorm = 0.2352, lr_0 = 3.3980e-04
Loss = 4.6388e-03, PNorm = 147.5221, GNorm = 0.2977, lr_0 = 3.3956e-04
Loss = 4.8117e-03, PNorm = 147.5299, GNorm = 0.2129, lr_0 = 3.3933e-04
Loss = 4.7683e-03, PNorm = 147.5405, GNorm = 0.4601, lr_0 = 3.3910e-04
Loss = 5.9884e-03, PNorm = 147.5517, GNorm = 0.3504, lr_0 = 3.3887e-04
Loss = 5.2350e-03, PNorm = 147.5601, GNorm = 0.2304, lr_0 = 3.3864e-04
Loss = 4.4264e-03, PNorm = 147.5701, GNorm = 0.4117, lr_0 = 3.3840e-04
Loss = 5.0223e-03, PNorm = 147.5825, GNorm = 0.1415, lr_0 = 3.3817e-04
Loss = 4.5269e-03, PNorm = 147.5926, GNorm = 0.2024, lr_0 = 3.3794e-04
Loss = 4.9198e-03, PNorm = 147.6060, GNorm = 0.1636, lr_0 = 3.3771e-04
Loss = 5.4871e-03, PNorm = 147.6174, GNorm = 0.3292, lr_0 = 3.3748e-04
Loss = 4.3807e-03, PNorm = 147.6289, GNorm = 0.3456, lr_0 = 3.3725e-04
Loss = 4.9507e-03, PNorm = 147.6427, GNorm = 0.1464, lr_0 = 3.3701e-04
Loss = 4.4592e-03, PNorm = 147.6536, GNorm = 0.0815, lr_0 = 3.3678e-04
Loss = 4.1076e-03, PNorm = 147.6612, GNorm = 0.2294, lr_0 = 3.3655e-04
Loss = 5.1008e-03, PNorm = 147.6724, GNorm = 0.1639, lr_0 = 3.3632e-04
Loss = 5.1240e-03, PNorm = 147.6848, GNorm = 0.0763, lr_0 = 3.3609e-04
Loss = 5.6291e-03, PNorm = 147.6930, GNorm = 0.0830, lr_0 = 3.3586e-04
Loss = 8.6420e-03, PNorm = 147.7037, GNorm = 0.2060, lr_0 = 3.3563e-04
Loss = 5.1763e-03, PNorm = 147.7192, GNorm = 0.0752, lr_0 = 3.3540e-04
Loss = 5.3422e-03, PNorm = 147.7290, GNorm = 0.2483, lr_0 = 3.3517e-04
Loss = 5.4583e-03, PNorm = 147.7418, GNorm = 0.1181, lr_0 = 3.3494e-04
Loss = 4.8120e-03, PNorm = 147.7508, GNorm = 0.1545, lr_0 = 3.3471e-04
Loss = 5.2540e-03, PNorm = 147.7647, GNorm = 0.1215, lr_0 = 3.3448e-04
Loss = 6.1034e-03, PNorm = 147.7770, GNorm = 0.1548, lr_0 = 3.3425e-04
Loss = 5.1154e-03, PNorm = 147.7853, GNorm = 0.2142, lr_0 = 3.3403e-04
Loss = 4.2998e-03, PNorm = 147.7951, GNorm = 0.3561, lr_0 = 3.3380e-04
Loss = 4.4261e-03, PNorm = 147.8035, GNorm = 0.1260, lr_0 = 3.3357e-04
Loss = 5.2035e-03, PNorm = 147.8153, GNorm = 0.3701, lr_0 = 3.3334e-04
Loss = 5.2202e-03, PNorm = 147.8298, GNorm = 0.2203, lr_0 = 3.3311e-04
Loss = 7.8016e-03, PNorm = 147.8426, GNorm = 0.2318, lr_0 = 3.3288e-04
Loss = 4.5845e-03, PNorm = 147.8548, GNorm = 0.1546, lr_0 = 3.3265e-04
Loss = 4.2195e-03, PNorm = 147.8655, GNorm = 0.1927, lr_0 = 3.3243e-04
Loss = 4.8143e-03, PNorm = 147.8748, GNorm = 0.1220, lr_0 = 3.3220e-04
Loss = 5.6827e-03, PNorm = 147.8838, GNorm = 0.1661, lr_0 = 3.3197e-04
Loss = 6.0416e-03, PNorm = 147.8885, GNorm = 0.2786, lr_0 = 3.3174e-04
Loss = 5.4474e-03, PNorm = 147.8978, GNorm = 0.0744, lr_0 = 3.3152e-04
Loss = 5.2424e-03, PNorm = 147.9128, GNorm = 0.1053, lr_0 = 3.3129e-04
Loss = 4.7304e-03, PNorm = 147.9279, GNorm = 0.3183, lr_0 = 3.3106e-04
Loss = 5.3061e-03, PNorm = 147.9376, GNorm = 0.1160, lr_0 = 3.3084e-04
Loss = 6.1193e-03, PNorm = 147.9482, GNorm = 0.2340, lr_0 = 3.3061e-04
Loss = 4.1274e-03, PNorm = 147.9576, GNorm = 0.1730, lr_0 = 3.3038e-04
Loss = 6.5969e-03, PNorm = 147.9673, GNorm = 0.4645, lr_0 = 3.3016e-04
Loss = 6.0179e-03, PNorm = 147.9780, GNorm = 0.3147, lr_0 = 3.2993e-04
Loss = 4.7288e-03, PNorm = 147.9911, GNorm = 0.3051, lr_0 = 3.2970e-04
Loss = 5.6222e-03, PNorm = 148.0009, GNorm = 0.3654, lr_0 = 3.2948e-04
Loss = 5.8615e-03, PNorm = 148.0138, GNorm = 0.1797, lr_0 = 3.2925e-04
Loss = 5.1415e-03, PNorm = 148.0268, GNorm = 0.2519, lr_0 = 3.2903e-04
Loss = 4.7166e-03, PNorm = 148.0398, GNorm = 0.2392, lr_0 = 3.2880e-04
Loss = 5.1504e-03, PNorm = 148.0534, GNorm = 0.1246, lr_0 = 3.2858e-04
Loss = 4.9763e-03, PNorm = 148.0652, GNorm = 0.1106, lr_0 = 3.2835e-04
Loss = 4.7827e-03, PNorm = 148.0738, GNorm = 0.2740, lr_0 = 3.2813e-04
Loss = 4.5328e-03, PNorm = 148.0844, GNorm = 0.0856, lr_0 = 3.2790e-04
Loss = 5.5369e-03, PNorm = 148.0972, GNorm = 0.2838, lr_0 = 3.2768e-04
Loss = 4.7999e-03, PNorm = 148.1081, GNorm = 0.2096, lr_0 = 3.2745e-04
Loss = 8.8084e-03, PNorm = 148.1221, GNorm = 0.1412, lr_0 = 3.2723e-04
Loss = 3.9889e-03, PNorm = 148.1320, GNorm = 0.0792, lr_0 = 3.2700e-04
Loss = 4.2708e-03, PNorm = 148.1419, GNorm = 0.1655, lr_0 = 3.2678e-04
Loss = 4.5065e-03, PNorm = 148.1555, GNorm = 0.2145, lr_0 = 3.2656e-04
Loss = 4.1851e-03, PNorm = 148.1680, GNorm = 0.0918, lr_0 = 3.2633e-04
Loss = 5.5343e-03, PNorm = 148.1802, GNorm = 0.1972, lr_0 = 3.2611e-04
Loss = 4.3432e-03, PNorm = 148.1938, GNorm = 0.1699, lr_0 = 3.2589e-04
Loss = 7.8398e-03, PNorm = 148.2051, GNorm = 0.2055, lr_0 = 3.2566e-04
Loss = 6.7753e-03, PNorm = 148.2145, GNorm = 0.1917, lr_0 = 3.2544e-04
Loss = 7.1385e-03, PNorm = 148.2211, GNorm = 0.2106, lr_0 = 3.2522e-04
Loss = 5.0855e-03, PNorm = 148.2286, GNorm = 0.1331, lr_0 = 3.2499e-04
Loss = 4.5236e-03, PNorm = 148.2386, GNorm = 0.2850, lr_0 = 3.2477e-04
Loss = 4.3716e-03, PNorm = 148.2514, GNorm = 0.3451, lr_0 = 3.2455e-04
Loss = 4.3568e-03, PNorm = 148.2645, GNorm = 0.1306, lr_0 = 3.2433e-04
Loss = 4.3046e-03, PNorm = 148.2720, GNorm = 0.3610, lr_0 = 3.2410e-04
Loss = 4.5749e-03, PNorm = 148.2809, GNorm = 0.1993, lr_0 = 3.2388e-04
Loss = 5.9541e-03, PNorm = 148.2913, GNorm = 0.2095, lr_0 = 3.2366e-04
Loss = 8.1497e-03, PNorm = 148.3040, GNorm = 0.2470, lr_0 = 3.2344e-04
Loss = 4.8584e-03, PNorm = 148.3156, GNorm = 0.2556, lr_0 = 3.2322e-04
Loss = 4.4354e-03, PNorm = 148.3299, GNorm = 0.1991, lr_0 = 3.2300e-04
Loss = 4.4779e-03, PNorm = 148.3466, GNorm = 0.3223, lr_0 = 3.2277e-04
Loss = 5.2300e-03, PNorm = 148.3597, GNorm = 0.2654, lr_0 = 3.2255e-04
Loss = 4.5832e-03, PNorm = 148.3680, GNorm = 0.4484, lr_0 = 3.2233e-04
Loss = 5.1520e-03, PNorm = 148.3750, GNorm = 0.1279, lr_0 = 3.2211e-04
Loss = 5.9171e-03, PNorm = 148.3855, GNorm = 0.1950, lr_0 = 3.2189e-04
Loss = 6.2061e-03, PNorm = 148.3979, GNorm = 0.1454, lr_0 = 3.2167e-04
Loss = 4.4230e-03, PNorm = 148.4113, GNorm = 0.1937, lr_0 = 3.2145e-04
Loss = 4.0869e-03, PNorm = 148.4229, GNorm = 0.1923, lr_0 = 3.2123e-04
Loss = 5.3061e-03, PNorm = 148.4369, GNorm = 0.4349, lr_0 = 3.2101e-04
Loss = 4.4294e-03, PNorm = 148.4519, GNorm = 0.1018, lr_0 = 3.2079e-04
Loss = 4.8202e-03, PNorm = 148.4664, GNorm = 0.2492, lr_0 = 3.2057e-04
Loss = 4.7834e-03, PNorm = 148.4767, GNorm = 0.4754, lr_0 = 3.2035e-04
Loss = 6.2632e-03, PNorm = 148.4869, GNorm = 0.5196, lr_0 = 3.2013e-04
Loss = 4.7523e-03, PNorm = 148.4990, GNorm = 0.1783, lr_0 = 3.1991e-04
Loss = 4.7454e-03, PNorm = 148.5111, GNorm = 0.2025, lr_0 = 3.1969e-04
Loss = 5.8188e-03, PNorm = 148.5224, GNorm = 0.2664, lr_0 = 3.1947e-04
Loss = 4.8193e-03, PNorm = 148.5340, GNorm = 0.2278, lr_0 = 3.1925e-04
Loss = 4.9504e-03, PNorm = 148.5465, GNorm = 0.1346, lr_0 = 3.1904e-04
Loss = 7.3268e-03, PNorm = 148.5604, GNorm = 0.0661, lr_0 = 3.1882e-04
Loss = 4.6308e-03, PNorm = 148.5740, GNorm = 0.1417, lr_0 = 3.1860e-04
Loss = 6.7714e-03, PNorm = 148.5834, GNorm = 0.1859, lr_0 = 3.1838e-04
Loss = 6.3271e-03, PNorm = 148.5976, GNorm = 0.3859, lr_0 = 3.1816e-04
Loss = 4.5155e-03, PNorm = 148.6106, GNorm = 0.3199, lr_0 = 3.1794e-04
Loss = 5.8001e-03, PNorm = 148.6207, GNorm = 0.3164, lr_0 = 3.1773e-04
Loss = 4.2849e-03, PNorm = 148.6289, GNorm = 0.2733, lr_0 = 3.1751e-04
Loss = 7.1011e-03, PNorm = 148.6367, GNorm = 0.2442, lr_0 = 3.1729e-04
Loss = 4.7107e-03, PNorm = 148.6482, GNorm = 0.2414, lr_0 = 3.1707e-04
Loss = 5.1940e-03, PNorm = 148.6643, GNorm = 0.2135, lr_0 = 3.1686e-04
Loss = 4.9337e-03, PNorm = 148.6767, GNorm = 0.1178, lr_0 = 3.1664e-04
Loss = 4.8941e-03, PNorm = 148.6918, GNorm = 0.1898, lr_0 = 3.1642e-04
Loss = 7.2948e-03, PNorm = 148.7031, GNorm = 0.0751, lr_0 = 3.1621e-04
Validation mae = 0.478522
Epoch 16
Loss = 4.3614e-03, PNorm = 148.7104, GNorm = 0.1035, lr_0 = 3.1599e-04
Loss = 4.0623e-03, PNorm = 148.7169, GNorm = 0.0708, lr_0 = 3.1577e-04
Loss = 4.3378e-03, PNorm = 148.7269, GNorm = 0.0774, lr_0 = 3.1556e-04
Loss = 3.9380e-03, PNorm = 148.7366, GNorm = 0.1502, lr_0 = 3.1534e-04
Loss = 4.5310e-03, PNorm = 148.7392, GNorm = 0.2178, lr_0 = 3.1512e-04
Loss = 3.7880e-03, PNorm = 148.7466, GNorm = 0.0572, lr_0 = 3.1491e-04
Loss = 3.9145e-03, PNorm = 148.7542, GNorm = 0.0747, lr_0 = 3.1469e-04
Loss = 6.0922e-03, PNorm = 148.7619, GNorm = 0.3484, lr_0 = 3.1448e-04
Loss = 4.5876e-03, PNorm = 148.7699, GNorm = 0.1624, lr_0 = 3.1426e-04
Loss = 4.0584e-03, PNorm = 148.7758, GNorm = 0.1677, lr_0 = 3.1405e-04
Loss = 4.3317e-03, PNorm = 148.7855, GNorm = 0.1900, lr_0 = 3.1383e-04
Loss = 4.8705e-03, PNorm = 148.7951, GNorm = 0.2867, lr_0 = 3.1362e-04
Loss = 4.3971e-03, PNorm = 148.8028, GNorm = 0.2366, lr_0 = 3.1340e-04
Loss = 4.2086e-03, PNorm = 148.8101, GNorm = 0.1089, lr_0 = 3.1319e-04
Loss = 5.7537e-03, PNorm = 148.8180, GNorm = 0.2033, lr_0 = 3.1297e-04
Loss = 3.9196e-03, PNorm = 148.8262, GNorm = 0.1622, lr_0 = 3.1276e-04
Loss = 6.1147e-03, PNorm = 148.8353, GNorm = 0.1093, lr_0 = 3.1254e-04
Loss = 4.5487e-03, PNorm = 148.8433, GNorm = 0.0786, lr_0 = 3.1233e-04
Loss = 3.9171e-03, PNorm = 148.8531, GNorm = 0.1044, lr_0 = 3.1212e-04
Loss = 4.4866e-03, PNorm = 148.8631, GNorm = 0.1592, lr_0 = 3.1190e-04
Loss = 3.7376e-03, PNorm = 148.8739, GNorm = 0.2258, lr_0 = 3.1169e-04
Loss = 3.7522e-03, PNorm = 148.8824, GNorm = 0.2427, lr_0 = 3.1147e-04
Loss = 4.5022e-03, PNorm = 148.8918, GNorm = 0.2502, lr_0 = 3.1126e-04
Loss = 4.3147e-03, PNorm = 148.8994, GNorm = 0.1121, lr_0 = 3.1105e-04
Loss = 5.4412e-03, PNorm = 148.9105, GNorm = 0.3680, lr_0 = 3.1083e-04
Loss = 7.6159e-03, PNorm = 148.9215, GNorm = 0.0981, lr_0 = 3.1062e-04
Loss = 4.6810e-03, PNorm = 148.9294, GNorm = 0.4034, lr_0 = 3.1041e-04
Loss = 5.3044e-03, PNorm = 148.9386, GNorm = 0.3956, lr_0 = 3.1020e-04
Loss = 4.6025e-03, PNorm = 148.9490, GNorm = 0.2378, lr_0 = 3.0998e-04
Loss = 4.5574e-03, PNorm = 148.9594, GNorm = 0.4375, lr_0 = 3.0977e-04
Loss = 4.3059e-03, PNorm = 148.9716, GNorm = 0.1966, lr_0 = 3.0956e-04
Loss = 3.9115e-03, PNorm = 148.9820, GNorm = 0.3058, lr_0 = 3.0935e-04
Loss = 5.0584e-03, PNorm = 148.9920, GNorm = 0.1774, lr_0 = 3.0914e-04
Loss = 4.3238e-03, PNorm = 148.9981, GNorm = 0.3192, lr_0 = 3.0892e-04
Loss = 4.4455e-03, PNorm = 149.0067, GNorm = 0.1811, lr_0 = 3.0871e-04
Loss = 3.4975e-03, PNorm = 149.0169, GNorm = 0.1785, lr_0 = 3.0850e-04
Loss = 3.9124e-03, PNorm = 149.0282, GNorm = 0.0845, lr_0 = 3.0829e-04
Loss = 3.7255e-03, PNorm = 149.0375, GNorm = 0.1452, lr_0 = 3.0808e-04
Loss = 4.2432e-03, PNorm = 149.0470, GNorm = 0.2111, lr_0 = 3.0787e-04
Loss = 4.2868e-03, PNorm = 149.0551, GNorm = 0.1670, lr_0 = 3.0766e-04
Loss = 4.3986e-03, PNorm = 149.0639, GNorm = 0.2803, lr_0 = 3.0745e-04
Loss = 3.3770e-03, PNorm = 149.0708, GNorm = 0.1698, lr_0 = 3.0723e-04
Loss = 3.7192e-03, PNorm = 149.0804, GNorm = 0.1396, lr_0 = 3.0702e-04
Loss = 4.2634e-03, PNorm = 149.0894, GNorm = 0.2708, lr_0 = 3.0681e-04
Loss = 6.3851e-03, PNorm = 149.0988, GNorm = 0.5141, lr_0 = 3.0660e-04
Loss = 4.2793e-03, PNorm = 149.1089, GNorm = 0.2075, lr_0 = 3.0639e-04
Loss = 5.4806e-03, PNorm = 149.1193, GNorm = 0.2650, lr_0 = 3.0618e-04
Loss = 5.4629e-03, PNorm = 149.1294, GNorm = 0.2725, lr_0 = 3.0597e-04
Loss = 4.7346e-03, PNorm = 149.1418, GNorm = 0.1973, lr_0 = 3.0576e-04
Loss = 3.5551e-03, PNorm = 149.1498, GNorm = 0.2363, lr_0 = 3.0555e-04
Loss = 3.6603e-03, PNorm = 149.1576, GNorm = 0.1074, lr_0 = 3.0535e-04
Loss = 4.2488e-03, PNorm = 149.1666, GNorm = 0.0713, lr_0 = 3.0514e-04
Loss = 3.8622e-03, PNorm = 149.1741, GNorm = 0.1499, lr_0 = 3.0493e-04
Loss = 4.3084e-03, PNorm = 149.1852, GNorm = 0.1755, lr_0 = 3.0472e-04
Loss = 3.6696e-03, PNorm = 149.1919, GNorm = 0.2126, lr_0 = 3.0451e-04
Loss = 6.3472e-03, PNorm = 149.1997, GNorm = 0.2099, lr_0 = 3.0430e-04
Loss = 4.7924e-03, PNorm = 149.2089, GNorm = 0.1548, lr_0 = 3.0409e-04
Loss = 3.7849e-03, PNorm = 149.2199, GNorm = 0.1239, lr_0 = 3.0388e-04
Loss = 4.3613e-03, PNorm = 149.2298, GNorm = 0.4146, lr_0 = 3.0368e-04
Loss = 3.9373e-03, PNorm = 149.2393, GNorm = 0.1673, lr_0 = 3.0347e-04
Loss = 5.3965e-03, PNorm = 149.2476, GNorm = 0.2771, lr_0 = 3.0326e-04
Loss = 3.8091e-03, PNorm = 149.2596, GNorm = 0.1496, lr_0 = 3.0305e-04
Loss = 4.2715e-03, PNorm = 149.2683, GNorm = 0.1978, lr_0 = 3.0284e-04
Loss = 3.8090e-03, PNorm = 149.2769, GNorm = 0.1259, lr_0 = 3.0264e-04
Loss = 7.8181e-03, PNorm = 149.2878, GNorm = 0.2377, lr_0 = 3.0243e-04
Loss = 5.4936e-03, PNorm = 149.3015, GNorm = 0.1137, lr_0 = 3.0222e-04
Loss = 5.1199e-03, PNorm = 149.3120, GNorm = 0.1823, lr_0 = 3.0202e-04
Loss = 5.7397e-03, PNorm = 149.3196, GNorm = 0.1572, lr_0 = 3.0181e-04
Loss = 8.7317e-03, PNorm = 149.3304, GNorm = 0.4042, lr_0 = 3.0160e-04
Loss = 4.1096e-03, PNorm = 149.3440, GNorm = 0.2293, lr_0 = 3.0140e-04
Loss = 8.6884e-03, PNorm = 149.3542, GNorm = 0.1365, lr_0 = 3.0119e-04
Loss = 6.6910e-03, PNorm = 149.3665, GNorm = 0.1541, lr_0 = 3.0098e-04
Loss = 6.7273e-03, PNorm = 149.3754, GNorm = 0.4502, lr_0 = 3.0078e-04
Loss = 4.7262e-03, PNorm = 149.3871, GNorm = 0.2915, lr_0 = 3.0057e-04
Loss = 3.6760e-03, PNorm = 149.3999, GNorm = 0.1108, lr_0 = 3.0036e-04
Loss = 3.3195e-03, PNorm = 149.4108, GNorm = 0.2487, lr_0 = 3.0016e-04
Loss = 3.8598e-03, PNorm = 149.4193, GNorm = 0.1551, lr_0 = 2.9995e-04
Loss = 4.3916e-03, PNorm = 149.4300, GNorm = 0.2549, lr_0 = 2.9975e-04
Loss = 4.1780e-03, PNorm = 149.4426, GNorm = 0.1253, lr_0 = 2.9954e-04
Loss = 4.2234e-03, PNorm = 149.4526, GNorm = 0.1567, lr_0 = 2.9934e-04
Loss = 3.7792e-03, PNorm = 149.4639, GNorm = 0.3443, lr_0 = 2.9913e-04
Loss = 4.9466e-03, PNorm = 149.4724, GNorm = 0.1913, lr_0 = 2.9893e-04
Loss = 3.9107e-03, PNorm = 149.4841, GNorm = 0.1426, lr_0 = 2.9872e-04
Loss = 5.0672e-03, PNorm = 149.4936, GNorm = 0.2094, lr_0 = 2.9852e-04
Loss = 3.8868e-03, PNorm = 149.5026, GNorm = 0.1744, lr_0 = 2.9831e-04
Loss = 4.8660e-03, PNorm = 149.5113, GNorm = 0.2768, lr_0 = 2.9811e-04
Loss = 4.1883e-03, PNorm = 149.5243, GNorm = 0.1572, lr_0 = 2.9790e-04
Loss = 6.2574e-03, PNorm = 149.5366, GNorm = 0.3975, lr_0 = 2.9770e-04
Loss = 6.3810e-03, PNorm = 149.5445, GNorm = 0.1237, lr_0 = 2.9750e-04
Loss = 4.1657e-03, PNorm = 149.5550, GNorm = 0.1394, lr_0 = 2.9729e-04
Loss = 4.7942e-03, PNorm = 149.5642, GNorm = 0.1420, lr_0 = 2.9709e-04
Loss = 5.4517e-03, PNorm = 149.5788, GNorm = 0.0897, lr_0 = 2.9689e-04
Loss = 4.8138e-03, PNorm = 149.5922, GNorm = 0.0649, lr_0 = 2.9668e-04
Loss = 5.0991e-03, PNorm = 149.6054, GNorm = 0.0759, lr_0 = 2.9648e-04
Loss = 3.4245e-03, PNorm = 149.6116, GNorm = 0.1827, lr_0 = 2.9628e-04
Loss = 4.7703e-03, PNorm = 149.6198, GNorm = 0.2119, lr_0 = 2.9607e-04
Loss = 3.9135e-03, PNorm = 149.6305, GNorm = 0.0934, lr_0 = 2.9587e-04
Loss = 4.4014e-03, PNorm = 149.6402, GNorm = 0.2318, lr_0 = 2.9567e-04
Loss = 3.4684e-03, PNorm = 149.6499, GNorm = 0.1028, lr_0 = 2.9546e-04
Loss = 3.9241e-03, PNorm = 149.6586, GNorm = 0.2984, lr_0 = 2.9526e-04
Loss = 5.1151e-03, PNorm = 149.6717, GNorm = 0.3171, lr_0 = 2.9506e-04
Loss = 4.4033e-03, PNorm = 149.6796, GNorm = 0.1323, lr_0 = 2.9486e-04
Loss = 5.3377e-03, PNorm = 149.6901, GNorm = 0.3254, lr_0 = 2.9466e-04
Loss = 5.4419e-03, PNorm = 149.7040, GNorm = 0.1786, lr_0 = 2.9445e-04
Loss = 3.4668e-03, PNorm = 149.7133, GNorm = 0.2481, lr_0 = 2.9425e-04
Loss = 6.7248e-03, PNorm = 149.7204, GNorm = 0.3092, lr_0 = 2.9405e-04
Loss = 4.9073e-03, PNorm = 149.7247, GNorm = 0.1302, lr_0 = 2.9385e-04
Loss = 3.3263e-03, PNorm = 149.7336, GNorm = 0.2516, lr_0 = 2.9365e-04
Loss = 6.1485e-03, PNorm = 149.7458, GNorm = 0.2301, lr_0 = 2.9345e-04
Loss = 4.5779e-03, PNorm = 149.7531, GNorm = 0.2675, lr_0 = 2.9325e-04
Loss = 4.2310e-03, PNorm = 149.7641, GNorm = 0.2143, lr_0 = 2.9305e-04
Loss = 4.2149e-03, PNorm = 149.7751, GNorm = 0.0659, lr_0 = 2.9284e-04
Loss = 4.8928e-03, PNorm = 149.7869, GNorm = 0.0680, lr_0 = 2.9264e-04
Loss = 4.3452e-03, PNorm = 149.7999, GNorm = 0.0956, lr_0 = 2.9244e-04
Loss = 4.4854e-03, PNorm = 149.8142, GNorm = 0.1775, lr_0 = 2.9224e-04
Loss = 4.3238e-03, PNorm = 149.8259, GNorm = 0.2424, lr_0 = 2.9204e-04
Loss = 3.5859e-03, PNorm = 149.8328, GNorm = 0.2151, lr_0 = 2.9184e-04
Loss = 4.3648e-03, PNorm = 149.8414, GNorm = 0.2088, lr_0 = 2.9164e-04
Loss = 4.2287e-03, PNorm = 149.8522, GNorm = 0.1284, lr_0 = 2.9144e-04
Loss = 4.0283e-03, PNorm = 149.8669, GNorm = 0.1437, lr_0 = 2.9124e-04
Validation mae = 0.476411
Epoch 17
Loss = 3.7648e-03, PNorm = 149.8750, GNorm = 0.1374, lr_0 = 2.9104e-04
Loss = 4.1372e-03, PNorm = 149.8815, GNorm = 0.4652, lr_0 = 2.9084e-04
Loss = 4.4335e-03, PNorm = 149.8874, GNorm = 0.1738, lr_0 = 2.9065e-04
Loss = 5.2153e-03, PNorm = 149.8927, GNorm = 0.5842, lr_0 = 2.9045e-04
Loss = 3.3137e-03, PNorm = 149.9006, GNorm = 0.1450, lr_0 = 2.9025e-04
Loss = 4.1037e-03, PNorm = 149.9120, GNorm = 0.2586, lr_0 = 2.9005e-04
Loss = 3.9322e-03, PNorm = 149.9200, GNorm = 0.0931, lr_0 = 2.8985e-04
Loss = 4.2254e-03, PNorm = 149.9240, GNorm = 0.1671, lr_0 = 2.8965e-04
Loss = 3.5773e-03, PNorm = 149.9311, GNorm = 0.1210, lr_0 = 2.8945e-04
Loss = 5.3866e-03, PNorm = 149.9370, GNorm = 0.0939, lr_0 = 2.8925e-04
Loss = 3.3924e-03, PNorm = 149.9454, GNorm = 0.0577, lr_0 = 2.8906e-04
Loss = 4.4244e-03, PNorm = 149.9554, GNorm = 0.0832, lr_0 = 2.8886e-04
Loss = 3.4754e-03, PNorm = 149.9654, GNorm = 0.0661, lr_0 = 2.8866e-04
Loss = 3.7091e-03, PNorm = 149.9751, GNorm = 0.1523, lr_0 = 2.8846e-04
Loss = 2.8669e-03, PNorm = 149.9839, GNorm = 0.1542, lr_0 = 2.8826e-04
Loss = 3.8900e-03, PNorm = 149.9911, GNorm = 0.1090, lr_0 = 2.8807e-04
Loss = 3.9320e-03, PNorm = 149.9969, GNorm = 0.0685, lr_0 = 2.8787e-04
Loss = 3.2684e-03, PNorm = 150.0032, GNorm = 0.1735, lr_0 = 2.8767e-04
Loss = 2.8721e-03, PNorm = 150.0117, GNorm = 0.2732, lr_0 = 2.8748e-04
Loss = 3.7096e-03, PNorm = 150.0202, GNorm = 0.1236, lr_0 = 2.8728e-04
Loss = 5.9591e-03, PNorm = 150.0253, GNorm = 0.0875, lr_0 = 2.8708e-04
Loss = 3.4051e-03, PNorm = 150.0338, GNorm = 0.2053, lr_0 = 2.8689e-04
Loss = 2.5789e-03, PNorm = 150.0444, GNorm = 0.1939, lr_0 = 2.8669e-04
Loss = 2.8172e-03, PNorm = 150.0534, GNorm = 0.0987, lr_0 = 2.8649e-04
Loss = 4.6943e-03, PNorm = 150.0601, GNorm = 0.2201, lr_0 = 2.8630e-04
Loss = 3.8332e-03, PNorm = 150.0676, GNorm = 0.2978, lr_0 = 2.8610e-04
Loss = 4.1888e-03, PNorm = 150.0766, GNorm = 0.3068, lr_0 = 2.8590e-04
Loss = 3.2356e-03, PNorm = 150.0840, GNorm = 0.2047, lr_0 = 2.8571e-04
Loss = 3.3856e-03, PNorm = 150.0895, GNorm = 0.1846, lr_0 = 2.8551e-04
Loss = 6.7288e-03, PNorm = 150.0965, GNorm = 0.0913, lr_0 = 2.8532e-04
Loss = 3.5574e-03, PNorm = 150.1073, GNorm = 0.1459, lr_0 = 2.8512e-04
Loss = 3.2003e-03, PNorm = 150.1143, GNorm = 0.1277, lr_0 = 2.8493e-04
Loss = 4.0333e-03, PNorm = 150.1226, GNorm = 0.2228, lr_0 = 2.8473e-04
Loss = 3.8106e-03, PNorm = 150.1360, GNorm = 0.1772, lr_0 = 2.8454e-04
Loss = 3.1941e-03, PNorm = 150.1431, GNorm = 0.2008, lr_0 = 2.8434e-04
Loss = 7.5679e-03, PNorm = 150.1499, GNorm = 0.2940, lr_0 = 2.8415e-04
Loss = 3.6097e-03, PNorm = 150.1558, GNorm = 0.1474, lr_0 = 2.8395e-04
Loss = 3.2125e-03, PNorm = 150.1635, GNorm = 0.2161, lr_0 = 2.8376e-04
Loss = 3.8210e-03, PNorm = 150.1720, GNorm = 0.0456, lr_0 = 2.8356e-04
Loss = 3.6721e-03, PNorm = 150.1794, GNorm = 0.2789, lr_0 = 2.8337e-04
Loss = 3.9044e-03, PNorm = 150.1862, GNorm = 0.1425, lr_0 = 2.8317e-04
Loss = 3.1130e-03, PNorm = 150.1949, GNorm = 0.1251, lr_0 = 2.8298e-04
Loss = 3.4976e-03, PNorm = 150.2032, GNorm = 0.1002, lr_0 = 2.8279e-04
Loss = 3.4689e-03, PNorm = 150.2100, GNorm = 0.2889, lr_0 = 2.8259e-04
Loss = 4.0004e-03, PNorm = 150.2149, GNorm = 0.1827, lr_0 = 2.8240e-04
Loss = 3.3311e-03, PNorm = 150.2235, GNorm = 0.1353, lr_0 = 2.8221e-04
Loss = 3.2969e-03, PNorm = 150.2298, GNorm = 0.1007, lr_0 = 2.8201e-04
Loss = 5.7364e-03, PNorm = 150.2348, GNorm = 0.1334, lr_0 = 2.8182e-04
Loss = 5.0778e-03, PNorm = 150.2432, GNorm = 0.1077, lr_0 = 2.8163e-04
Loss = 3.2949e-03, PNorm = 150.2524, GNorm = 0.1413, lr_0 = 2.8143e-04
Loss = 4.2359e-03, PNorm = 150.2616, GNorm = 0.2083, lr_0 = 2.8124e-04
Loss = 3.4638e-03, PNorm = 150.2661, GNorm = 0.1312, lr_0 = 2.8105e-04
Loss = 3.0806e-03, PNorm = 150.2727, GNorm = 0.1335, lr_0 = 2.8085e-04
Loss = 4.2845e-03, PNorm = 150.2803, GNorm = 0.0742, lr_0 = 2.8066e-04
Loss = 3.1949e-03, PNorm = 150.2889, GNorm = 0.0767, lr_0 = 2.8047e-04
Loss = 6.5357e-03, PNorm = 150.2980, GNorm = 0.0956, lr_0 = 2.8028e-04
Loss = 5.1044e-03, PNorm = 150.3065, GNorm = 0.4689, lr_0 = 2.8009e-04
Loss = 5.1405e-03, PNorm = 150.3157, GNorm = 0.1246, lr_0 = 2.7989e-04
Loss = 4.0426e-03, PNorm = 150.3226, GNorm = 0.2951, lr_0 = 2.7970e-04
Loss = 4.3769e-03, PNorm = 150.3293, GNorm = 0.1965, lr_0 = 2.7951e-04
Loss = 3.9457e-03, PNorm = 150.3376, GNorm = 0.3034, lr_0 = 2.7932e-04
Loss = 4.0237e-03, PNorm = 150.3437, GNorm = 0.2692, lr_0 = 2.7913e-04
Loss = 4.2728e-03, PNorm = 150.3536, GNorm = 0.3562, lr_0 = 2.7894e-04
Loss = 3.2449e-03, PNorm = 150.3614, GNorm = 0.1997, lr_0 = 2.7875e-04
Loss = 4.2489e-03, PNorm = 150.3696, GNorm = 0.3145, lr_0 = 2.7855e-04
Loss = 4.3322e-03, PNorm = 150.3791, GNorm = 0.0711, lr_0 = 2.7836e-04
Loss = 3.6091e-03, PNorm = 150.3908, GNorm = 0.1715, lr_0 = 2.7817e-04
Loss = 4.3156e-03, PNorm = 150.3996, GNorm = 0.0765, lr_0 = 2.7798e-04
Loss = 4.2474e-03, PNorm = 150.4087, GNorm = 0.2557, lr_0 = 2.7779e-04
Loss = 3.4579e-03, PNorm = 150.4170, GNorm = 0.0837, lr_0 = 2.7760e-04
Loss = 3.0604e-03, PNorm = 150.4234, GNorm = 0.1615, lr_0 = 2.7741e-04
Loss = 2.7337e-03, PNorm = 150.4316, GNorm = 0.1265, lr_0 = 2.7722e-04
Loss = 3.0920e-03, PNorm = 150.4390, GNorm = 0.0689, lr_0 = 2.7703e-04
Loss = 3.5329e-03, PNorm = 150.4434, GNorm = 0.1082, lr_0 = 2.7684e-04
Loss = 3.3983e-03, PNorm = 150.4502, GNorm = 0.2182, lr_0 = 2.7665e-04
Loss = 4.3381e-03, PNorm = 150.4542, GNorm = 0.1010, lr_0 = 2.7646e-04
Loss = 4.2857e-03, PNorm = 150.4619, GNorm = 0.1065, lr_0 = 2.7627e-04
Loss = 2.9346e-03, PNorm = 150.4679, GNorm = 0.1060, lr_0 = 2.7608e-04
Loss = 3.2150e-03, PNorm = 150.4752, GNorm = 0.1290, lr_0 = 2.7590e-04
Loss = 4.5817e-03, PNorm = 150.4836, GNorm = 0.2400, lr_0 = 2.7571e-04
Loss = 5.1118e-03, PNorm = 150.4941, GNorm = 0.1453, lr_0 = 2.7552e-04
Loss = 3.9159e-03, PNorm = 150.5051, GNorm = 0.1604, lr_0 = 2.7533e-04
Loss = 3.1673e-03, PNorm = 150.5148, GNorm = 0.1677, lr_0 = 2.7514e-04
Loss = 4.2746e-03, PNorm = 150.5257, GNorm = 0.1208, lr_0 = 2.7495e-04
Loss = 3.2763e-03, PNorm = 150.5396, GNorm = 0.0757, lr_0 = 2.7476e-04
Loss = 3.5379e-03, PNorm = 150.5475, GNorm = 0.2950, lr_0 = 2.7457e-04
Loss = 3.4954e-03, PNorm = 150.5563, GNorm = 0.2065, lr_0 = 2.7439e-04
Loss = 3.2570e-03, PNorm = 150.5645, GNorm = 0.2042, lr_0 = 2.7420e-04
Loss = 3.4090e-03, PNorm = 150.5728, GNorm = 0.2852, lr_0 = 2.7401e-04
Loss = 4.1664e-03, PNorm = 150.5821, GNorm = 0.1430, lr_0 = 2.7382e-04
Loss = 3.1528e-03, PNorm = 150.5884, GNorm = 0.1752, lr_0 = 2.7364e-04
Loss = 3.2149e-03, PNorm = 150.5977, GNorm = 0.3154, lr_0 = 2.7345e-04
Loss = 3.5060e-03, PNorm = 150.6065, GNorm = 0.1951, lr_0 = 2.7326e-04
Loss = 5.4235e-03, PNorm = 150.6148, GNorm = 0.2182, lr_0 = 2.7307e-04
Loss = 5.0021e-03, PNorm = 150.6244, GNorm = 0.1108, lr_0 = 2.7289e-04
Loss = 4.7071e-03, PNorm = 150.6320, GNorm = 0.0827, lr_0 = 2.7270e-04
Loss = 3.4711e-03, PNorm = 150.6385, GNorm = 0.1893, lr_0 = 2.7251e-04
Loss = 5.9423e-03, PNorm = 150.6505, GNorm = 0.2015, lr_0 = 2.7233e-04
Loss = 4.4039e-03, PNorm = 150.6659, GNorm = 0.0927, lr_0 = 2.7214e-04
Loss = 3.6314e-03, PNorm = 150.6753, GNorm = 0.1766, lr_0 = 2.7195e-04
Loss = 6.2746e-03, PNorm = 150.6798, GNorm = 0.0860, lr_0 = 2.7177e-04
Loss = 2.9058e-03, PNorm = 150.6845, GNorm = 0.1634, lr_0 = 2.7158e-04
Loss = 3.6006e-03, PNorm = 150.6939, GNorm = 0.2344, lr_0 = 2.7139e-04
Loss = 4.9756e-03, PNorm = 150.7037, GNorm = 0.2014, lr_0 = 2.7121e-04
Loss = 3.2843e-03, PNorm = 150.7107, GNorm = 0.1686, lr_0 = 2.7102e-04
Loss = 5.5791e-03, PNorm = 150.7158, GNorm = 0.0944, lr_0 = 2.7084e-04
Loss = 3.1839e-03, PNorm = 150.7227, GNorm = 0.2723, lr_0 = 2.7065e-04
Loss = 3.4575e-03, PNorm = 150.7339, GNorm = 0.0746, lr_0 = 2.7047e-04
Loss = 3.4167e-03, PNorm = 150.7430, GNorm = 0.4094, lr_0 = 2.7028e-04
Loss = 4.7261e-03, PNorm = 150.7523, GNorm = 0.0784, lr_0 = 2.7010e-04
Loss = 4.4038e-03, PNorm = 150.7603, GNorm = 0.4027, lr_0 = 2.6991e-04
Loss = 3.5040e-03, PNorm = 150.7697, GNorm = 0.1677, lr_0 = 2.6973e-04
Loss = 4.8655e-03, PNorm = 150.7806, GNorm = 0.1849, lr_0 = 2.6954e-04
Loss = 3.2850e-03, PNorm = 150.7916, GNorm = 0.1217, lr_0 = 2.6936e-04
Loss = 3.8837e-03, PNorm = 150.8001, GNorm = 0.1748, lr_0 = 2.6917e-04
Loss = 3.2477e-03, PNorm = 150.8038, GNorm = 0.2766, lr_0 = 2.6899e-04
Loss = 3.9494e-03, PNorm = 150.8111, GNorm = 0.2585, lr_0 = 2.6880e-04
Loss = 4.7145e-03, PNorm = 150.8196, GNorm = 0.0564, lr_0 = 2.6862e-04
Loss = 4.6040e-03, PNorm = 150.8279, GNorm = 0.1365, lr_0 = 2.6844e-04
Loss = 5.6174e-03, PNorm = 150.8370, GNorm = 0.1163, lr_0 = 2.6825e-04
Validation mae = 0.475202
Epoch 18
Loss = 4.6985e-03, PNorm = 150.8415, GNorm = 0.2959, lr_0 = 2.6807e-04
Loss = 3.8107e-03, PNorm = 150.8465, GNorm = 0.0769, lr_0 = 2.6788e-04
Loss = 2.8686e-03, PNorm = 150.8500, GNorm = 0.3349, lr_0 = 2.6770e-04
Loss = 3.5318e-03, PNorm = 150.8580, GNorm = 0.1377, lr_0 = 2.6752e-04
Loss = 3.7225e-03, PNorm = 150.8638, GNorm = 0.1777, lr_0 = 2.6733e-04
Loss = 5.6330e-03, PNorm = 150.8701, GNorm = 0.1880, lr_0 = 2.6715e-04
Loss = 3.6099e-03, PNorm = 150.8760, GNorm = 0.1576, lr_0 = 2.6697e-04
Loss = 3.4201e-03, PNorm = 150.8872, GNorm = 0.2622, lr_0 = 2.6678e-04
Loss = 3.2521e-03, PNorm = 150.8954, GNorm = 0.0769, lr_0 = 2.6660e-04
Loss = 3.3545e-03, PNorm = 150.8997, GNorm = 0.0683, lr_0 = 2.6642e-04
Loss = 3.2453e-03, PNorm = 150.9062, GNorm = 0.1464, lr_0 = 2.6624e-04
Loss = 3.4354e-03, PNorm = 150.9113, GNorm = 0.1861, lr_0 = 2.6605e-04
Loss = 3.3082e-03, PNorm = 150.9166, GNorm = 0.2730, lr_0 = 2.6587e-04
Loss = 3.2073e-03, PNorm = 150.9241, GNorm = 0.1227, lr_0 = 2.6569e-04
Loss = 3.7031e-03, PNorm = 150.9303, GNorm = 0.0858, lr_0 = 2.6551e-04
Loss = 3.7764e-03, PNorm = 150.9370, GNorm = 0.2194, lr_0 = 2.6533e-04
Loss = 3.2618e-03, PNorm = 150.9458, GNorm = 0.1200, lr_0 = 2.6514e-04
Loss = 3.3055e-03, PNorm = 150.9487, GNorm = 0.1710, lr_0 = 2.6496e-04
Loss = 3.1787e-03, PNorm = 150.9550, GNorm = 0.3020, lr_0 = 2.6478e-04
Loss = 3.4360e-03, PNorm = 150.9574, GNorm = 0.0887, lr_0 = 2.6460e-04
Loss = 3.7102e-03, PNorm = 150.9628, GNorm = 0.1323, lr_0 = 2.6442e-04
Loss = 3.0924e-03, PNorm = 150.9694, GNorm = 0.0723, lr_0 = 2.6424e-04
Loss = 2.4808e-03, PNorm = 150.9785, GNorm = 0.0814, lr_0 = 2.6406e-04
Loss = 3.0827e-03, PNorm = 150.9871, GNorm = 0.0781, lr_0 = 2.6388e-04
Loss = 2.9192e-03, PNorm = 150.9943, GNorm = 0.1414, lr_0 = 2.6369e-04
Loss = 3.0619e-03, PNorm = 150.9992, GNorm = 0.1953, lr_0 = 2.6351e-04
Loss = 4.2838e-03, PNorm = 151.0036, GNorm = 0.0677, lr_0 = 2.6333e-04
Loss = 4.0609e-03, PNorm = 151.0089, GNorm = 0.1980, lr_0 = 2.6315e-04
Loss = 3.4946e-03, PNorm = 151.0147, GNorm = 0.1698, lr_0 = 2.6297e-04
Loss = 3.7955e-03, PNorm = 151.0244, GNorm = 0.0936, lr_0 = 2.6279e-04
Loss = 3.3779e-03, PNorm = 151.0323, GNorm = 0.1437, lr_0 = 2.6261e-04
Loss = 3.4404e-03, PNorm = 151.0352, GNorm = 0.1080, lr_0 = 2.6243e-04
Loss = 2.4918e-03, PNorm = 151.0414, GNorm = 0.1949, lr_0 = 2.6225e-04
Loss = 2.7117e-03, PNorm = 151.0472, GNorm = 0.1155, lr_0 = 2.6207e-04
Loss = 4.4212e-03, PNorm = 151.0500, GNorm = 0.1611, lr_0 = 2.6189e-04
Loss = 2.7118e-03, PNorm = 151.0560, GNorm = 0.1120, lr_0 = 2.6171e-04
Loss = 3.3839e-03, PNorm = 151.0662, GNorm = 0.2072, lr_0 = 2.6153e-04
Loss = 2.4335e-03, PNorm = 151.0730, GNorm = 0.1132, lr_0 = 2.6136e-04
Loss = 3.7688e-03, PNorm = 151.0770, GNorm = 0.1699, lr_0 = 2.6118e-04
Loss = 3.2887e-03, PNorm = 151.0848, GNorm = 0.2686, lr_0 = 2.6100e-04
Loss = 3.6097e-03, PNorm = 151.0924, GNorm = 0.0867, lr_0 = 2.6082e-04
Loss = 2.8496e-03, PNorm = 151.0975, GNorm = 0.2614, lr_0 = 2.6064e-04
Loss = 2.7142e-03, PNorm = 151.1032, GNorm = 0.1898, lr_0 = 2.6046e-04
Loss = 3.1086e-03, PNorm = 151.1080, GNorm = 0.1119, lr_0 = 2.6028e-04
Loss = 3.6544e-03, PNorm = 151.1155, GNorm = 0.1231, lr_0 = 2.6011e-04
Loss = 3.9613e-03, PNorm = 151.1222, GNorm = 0.1401, lr_0 = 2.5993e-04
Loss = 4.0194e-03, PNorm = 151.1299, GNorm = 0.1888, lr_0 = 2.5975e-04
Loss = 3.7589e-03, PNorm = 151.1356, GNorm = 0.0705, lr_0 = 2.5957e-04
Loss = 3.9554e-03, PNorm = 151.1407, GNorm = 0.1725, lr_0 = 2.5939e-04
Loss = 3.9439e-03, PNorm = 151.1509, GNorm = 0.1139, lr_0 = 2.5922e-04
Loss = 3.0510e-03, PNorm = 151.1605, GNorm = 0.0951, lr_0 = 2.5904e-04
Loss = 3.2438e-03, PNorm = 151.1671, GNorm = 0.0608, lr_0 = 2.5886e-04
Loss = 2.6116e-03, PNorm = 151.1743, GNorm = 0.1614, lr_0 = 2.5868e-04
Loss = 4.4068e-03, PNorm = 151.1799, GNorm = 0.1809, lr_0 = 2.5851e-04
Loss = 3.6125e-03, PNorm = 151.1852, GNorm = 0.1976, lr_0 = 2.5833e-04
Loss = 2.3011e-03, PNorm = 151.1910, GNorm = 0.2763, lr_0 = 2.5815e-04
Loss = 2.6368e-03, PNorm = 151.1948, GNorm = 0.0655, lr_0 = 2.5797e-04
Loss = 2.2713e-03, PNorm = 151.2012, GNorm = 0.1889, lr_0 = 2.5780e-04
Loss = 5.6876e-03, PNorm = 151.2109, GNorm = 0.1830, lr_0 = 2.5762e-04
Loss = 3.2311e-03, PNorm = 151.2179, GNorm = 0.1318, lr_0 = 2.5745e-04
Loss = 3.4225e-03, PNorm = 151.2241, GNorm = 0.2002, lr_0 = 2.5727e-04
Loss = 3.9782e-03, PNorm = 151.2279, GNorm = 0.3848, lr_0 = 2.5709e-04
Loss = 3.2322e-03, PNorm = 151.2340, GNorm = 0.2609, lr_0 = 2.5692e-04
Loss = 4.3841e-03, PNorm = 151.2403, GNorm = 0.2285, lr_0 = 2.5674e-04
Loss = 4.0132e-03, PNorm = 151.2478, GNorm = 0.0852, lr_0 = 2.5656e-04
Loss = 3.3889e-03, PNorm = 151.2553, GNorm = 0.1273, lr_0 = 2.5639e-04
Loss = 3.9748e-03, PNorm = 151.2616, GNorm = 0.1706, lr_0 = 2.5621e-04
Loss = 5.9926e-03, PNorm = 151.2665, GNorm = 0.2318, lr_0 = 2.5604e-04
Loss = 2.9031e-03, PNorm = 151.2707, GNorm = 0.1569, lr_0 = 2.5586e-04
Loss = 3.2423e-03, PNorm = 151.2775, GNorm = 0.0981, lr_0 = 2.5569e-04
Loss = 2.8239e-03, PNorm = 151.2841, GNorm = 0.1210, lr_0 = 2.5551e-04
Loss = 3.1709e-03, PNorm = 151.2914, GNorm = 0.2122, lr_0 = 2.5534e-04
Loss = 4.8878e-03, PNorm = 151.2978, GNorm = 0.1276, lr_0 = 2.5516e-04
Loss = 3.4388e-03, PNorm = 151.3078, GNorm = 0.3121, lr_0 = 2.5499e-04
Loss = 2.8746e-03, PNorm = 151.3171, GNorm = 0.2252, lr_0 = 2.5481e-04
Loss = 2.8050e-03, PNorm = 151.3248, GNorm = 0.2160, lr_0 = 2.5464e-04
Loss = 2.9475e-03, PNorm = 151.3336, GNorm = 0.1812, lr_0 = 2.5446e-04
Loss = 3.2014e-03, PNorm = 151.3409, GNorm = 0.1476, lr_0 = 2.5429e-04
Loss = 2.8365e-03, PNorm = 151.3485, GNorm = 0.1546, lr_0 = 2.5411e-04
Loss = 4.7676e-03, PNorm = 151.3547, GNorm = 0.1242, lr_0 = 2.5394e-04
Loss = 4.3286e-03, PNorm = 151.3622, GNorm = 0.3039, lr_0 = 2.5377e-04
Loss = 2.5751e-03, PNorm = 151.3694, GNorm = 0.1735, lr_0 = 2.5359e-04
Loss = 2.5570e-03, PNorm = 151.3752, GNorm = 0.1035, lr_0 = 2.5342e-04
Loss = 3.0673e-03, PNorm = 151.3842, GNorm = 0.1692, lr_0 = 2.5325e-04
Loss = 2.6184e-03, PNorm = 151.3915, GNorm = 0.1296, lr_0 = 2.5307e-04
Loss = 4.9578e-03, PNorm = 151.3950, GNorm = 0.1201, lr_0 = 2.5290e-04
Loss = 3.3457e-03, PNorm = 151.4003, GNorm = 0.2392, lr_0 = 2.5273e-04
Loss = 2.9741e-03, PNorm = 151.4048, GNorm = 0.3806, lr_0 = 2.5255e-04
Loss = 2.7264e-03, PNorm = 151.4134, GNorm = 0.3316, lr_0 = 2.5238e-04
Loss = 2.7094e-03, PNorm = 151.4234, GNorm = 0.2056, lr_0 = 2.5221e-04
Loss = 3.5062e-03, PNorm = 151.4336, GNorm = 0.2550, lr_0 = 2.5203e-04
Loss = 5.0505e-03, PNorm = 151.4394, GNorm = 0.1990, lr_0 = 2.5186e-04
Loss = 4.7284e-03, PNorm = 151.4453, GNorm = 0.4599, lr_0 = 2.5169e-04
Loss = 4.1166e-03, PNorm = 151.4545, GNorm = 0.0829, lr_0 = 2.5152e-04
Loss = 6.3484e-03, PNorm = 151.4640, GNorm = 0.1777, lr_0 = 2.5134e-04
Loss = 2.4626e-03, PNorm = 151.4728, GNorm = 0.0800, lr_0 = 2.5117e-04
Loss = 2.6303e-03, PNorm = 151.4782, GNorm = 0.1134, lr_0 = 2.5100e-04
Loss = 2.8718e-03, PNorm = 151.4856, GNorm = 0.2005, lr_0 = 2.5083e-04
Loss = 4.4937e-03, PNorm = 151.4899, GNorm = 0.3076, lr_0 = 2.5066e-04
Loss = 2.8451e-03, PNorm = 151.4963, GNorm = 0.1888, lr_0 = 2.5048e-04
Loss = 2.4823e-03, PNorm = 151.5045, GNorm = 0.2590, lr_0 = 2.5031e-04
Loss = 4.6677e-03, PNorm = 151.5136, GNorm = 0.0471, lr_0 = 2.5014e-04
Loss = 4.2773e-03, PNorm = 151.5220, GNorm = 0.0763, lr_0 = 2.4997e-04
Loss = 2.5353e-03, PNorm = 151.5302, GNorm = 0.1409, lr_0 = 2.4980e-04
Loss = 3.4569e-03, PNorm = 151.5393, GNorm = 0.0874, lr_0 = 2.4963e-04
Loss = 4.0411e-03, PNorm = 151.5480, GNorm = 0.2496, lr_0 = 2.4946e-04
Loss = 2.7585e-03, PNorm = 151.5571, GNorm = 0.2413, lr_0 = 2.4929e-04
Loss = 3.6371e-03, PNorm = 151.5617, GNorm = 0.0607, lr_0 = 2.4911e-04
Loss = 2.9192e-03, PNorm = 151.5686, GNorm = 0.1581, lr_0 = 2.4894e-04
Loss = 3.0013e-03, PNorm = 151.5770, GNorm = 0.2702, lr_0 = 2.4877e-04
Loss = 3.5353e-03, PNorm = 151.5875, GNorm = 0.0702, lr_0 = 2.4860e-04
Loss = 4.0663e-03, PNorm = 151.5964, GNorm = 0.1936, lr_0 = 2.4843e-04
Loss = 2.2154e-03, PNorm = 151.6058, GNorm = 0.0551, lr_0 = 2.4826e-04
Loss = 2.5527e-03, PNorm = 151.6139, GNorm = 0.1723, lr_0 = 2.4809e-04
Loss = 2.7932e-03, PNorm = 151.6204, GNorm = 0.1380, lr_0 = 2.4792e-04
Loss = 3.3718e-03, PNorm = 151.6272, GNorm = 0.2730, lr_0 = 2.4775e-04
Loss = 2.5946e-03, PNorm = 151.6330, GNorm = 0.1851, lr_0 = 2.4758e-04
Loss = 3.8217e-03, PNorm = 151.6387, GNorm = 0.1322, lr_0 = 2.4741e-04
Loss = 3.6987e-03, PNorm = 151.6428, GNorm = 0.2824, lr_0 = 2.4724e-04
Loss = 5.3922e-03, PNorm = 151.6480, GNorm = 0.2542, lr_0 = 2.4707e-04
Validation mae = 0.475412
Epoch 19
Loss = 2.5491e-03, PNorm = 151.6533, GNorm = 0.1199, lr_0 = 2.4690e-04
Loss = 5.9215e-03, PNorm = 151.6571, GNorm = 0.0884, lr_0 = 2.4674e-04
Loss = 3.5902e-03, PNorm = 151.6617, GNorm = 0.1514, lr_0 = 2.4657e-04
Loss = 4.3468e-03, PNorm = 151.6692, GNorm = 0.1446, lr_0 = 2.4640e-04
Loss = 3.0437e-03, PNorm = 151.6759, GNorm = 0.2688, lr_0 = 2.4623e-04
Loss = 2.6852e-03, PNorm = 151.6831, GNorm = 0.0711, lr_0 = 2.4606e-04
Loss = 2.6699e-03, PNorm = 151.6873, GNorm = 0.0394, lr_0 = 2.4589e-04
Loss = 3.0915e-03, PNorm = 151.6915, GNorm = 0.0782, lr_0 = 2.4572e-04
Loss = 3.1636e-03, PNorm = 151.6987, GNorm = 0.1863, lr_0 = 2.4556e-04
Loss = 2.2143e-03, PNorm = 151.7050, GNorm = 0.1571, lr_0 = 2.4539e-04
Loss = 1.9234e-03, PNorm = 151.7086, GNorm = 0.0670, lr_0 = 2.4522e-04
Loss = 2.3695e-03, PNorm = 151.7151, GNorm = 0.3441, lr_0 = 2.4505e-04
Loss = 3.7175e-03, PNorm = 151.7211, GNorm = 0.0920, lr_0 = 2.4488e-04
Loss = 2.7943e-03, PNorm = 151.7266, GNorm = 0.1810, lr_0 = 2.4472e-04
Loss = 2.4170e-03, PNorm = 151.7291, GNorm = 0.1122, lr_0 = 2.4455e-04
Loss = 2.5893e-03, PNorm = 151.7329, GNorm = 0.2789, lr_0 = 2.4438e-04
Loss = 2.7530e-03, PNorm = 151.7356, GNorm = 0.1890, lr_0 = 2.4421e-04
Loss = 2.7824e-03, PNorm = 151.7405, GNorm = 0.1794, lr_0 = 2.4405e-04
Loss = 2.6537e-03, PNorm = 151.7484, GNorm = 0.1826, lr_0 = 2.4388e-04
Loss = 2.5902e-03, PNorm = 151.7551, GNorm = 0.0620, lr_0 = 2.4371e-04
Loss = 2.6260e-03, PNorm = 151.7591, GNorm = 0.0640, lr_0 = 2.4354e-04
Loss = 2.8623e-03, PNorm = 151.7643, GNorm = 0.3443, lr_0 = 2.4338e-04
Loss = 2.2507e-03, PNorm = 151.7692, GNorm = 0.1747, lr_0 = 2.4321e-04
Loss = 2.1514e-03, PNorm = 151.7754, GNorm = 0.2480, lr_0 = 2.4304e-04
Loss = 2.5159e-03, PNorm = 151.7826, GNorm = 0.1132, lr_0 = 2.4288e-04
Loss = 4.9476e-03, PNorm = 151.7865, GNorm = 0.2481, lr_0 = 2.4271e-04
Loss = 2.9483e-03, PNorm = 151.7901, GNorm = 0.1592, lr_0 = 2.4254e-04
Loss = 3.7256e-03, PNorm = 151.7933, GNorm = 0.2670, lr_0 = 2.4238e-04
Loss = 4.8484e-03, PNorm = 151.7976, GNorm = 0.1359, lr_0 = 2.4221e-04
Loss = 2.9487e-03, PNorm = 151.7989, GNorm = 0.3248, lr_0 = 2.4205e-04
Loss = 2.2206e-03, PNorm = 151.8041, GNorm = 0.0760, lr_0 = 2.4188e-04
Loss = 3.1006e-03, PNorm = 151.8094, GNorm = 0.0728, lr_0 = 2.4171e-04
Loss = 2.6434e-03, PNorm = 151.8134, GNorm = 0.2593, lr_0 = 2.4155e-04
Loss = 3.2273e-03, PNorm = 151.8185, GNorm = 0.2220, lr_0 = 2.4138e-04
Loss = 2.0906e-03, PNorm = 151.8256, GNorm = 0.1873, lr_0 = 2.4122e-04
Loss = 2.0501e-03, PNorm = 151.8310, GNorm = 0.0844, lr_0 = 2.4105e-04
Loss = 2.8131e-03, PNorm = 151.8377, GNorm = 0.1807, lr_0 = 2.4089e-04
Loss = 3.6221e-03, PNorm = 151.8412, GNorm = 0.1555, lr_0 = 2.4072e-04
Loss = 2.8996e-03, PNorm = 151.8449, GNorm = 0.0871, lr_0 = 2.4056e-04
Loss = 2.7548e-03, PNorm = 151.8475, GNorm = 0.2124, lr_0 = 2.4039e-04
Loss = 4.5607e-03, PNorm = 151.8528, GNorm = 0.1933, lr_0 = 2.4023e-04
Loss = 2.4788e-03, PNorm = 151.8598, GNorm = 0.1004, lr_0 = 2.4006e-04
Loss = 3.3376e-03, PNorm = 151.8661, GNorm = 0.1324, lr_0 = 2.3990e-04
Loss = 2.4799e-03, PNorm = 151.8733, GNorm = 0.1076, lr_0 = 2.3974e-04
Loss = 2.8254e-03, PNorm = 151.8799, GNorm = 0.0734, lr_0 = 2.3957e-04
Loss = 2.7028e-03, PNorm = 151.8855, GNorm = 0.2360, lr_0 = 2.3941e-04
Loss = 5.6337e-03, PNorm = 151.8885, GNorm = 0.4913, lr_0 = 2.3924e-04
Loss = 3.2704e-03, PNorm = 151.8919, GNorm = 0.1122, lr_0 = 2.3908e-04
Loss = 2.2957e-03, PNorm = 151.8952, GNorm = 0.0829, lr_0 = 2.3892e-04
Loss = 3.7340e-03, PNorm = 151.9017, GNorm = 0.0852, lr_0 = 2.3875e-04
Loss = 3.5829e-03, PNorm = 151.9077, GNorm = 0.1435, lr_0 = 2.3859e-04
Loss = 2.1977e-03, PNorm = 151.9131, GNorm = 0.0839, lr_0 = 2.3842e-04
Loss = 2.6364e-03, PNorm = 151.9201, GNorm = 0.0630, lr_0 = 2.3826e-04
Loss = 3.7501e-03, PNorm = 151.9298, GNorm = 0.1349, lr_0 = 2.3810e-04
Loss = 2.4599e-03, PNorm = 151.9359, GNorm = 0.1933, lr_0 = 2.3794e-04
Loss = 3.4087e-03, PNorm = 151.9403, GNorm = 0.1172, lr_0 = 2.3777e-04
Loss = 2.8844e-03, PNorm = 151.9471, GNorm = 0.1557, lr_0 = 2.3761e-04
Loss = 2.2974e-03, PNorm = 151.9547, GNorm = 0.1811, lr_0 = 2.3745e-04
Loss = 6.0954e-03, PNorm = 151.9594, GNorm = 0.2520, lr_0 = 2.3728e-04
Loss = 2.2184e-03, PNorm = 151.9653, GNorm = 0.0797, lr_0 = 2.3712e-04
Loss = 3.6495e-03, PNorm = 151.9727, GNorm = 0.1103, lr_0 = 2.3696e-04
Loss = 2.7696e-03, PNorm = 151.9789, GNorm = 0.0532, lr_0 = 2.3680e-04
Loss = 2.3243e-03, PNorm = 151.9833, GNorm = 0.1731, lr_0 = 2.3663e-04
Loss = 3.0209e-03, PNorm = 151.9873, GNorm = 0.2697, lr_0 = 2.3647e-04
Loss = 2.3416e-03, PNorm = 151.9919, GNorm = 0.1732, lr_0 = 2.3631e-04
Loss = 2.2285e-03, PNorm = 151.9991, GNorm = 0.1774, lr_0 = 2.3615e-04
Loss = 2.8469e-03, PNorm = 152.0069, GNorm = 0.2166, lr_0 = 2.3599e-04
Loss = 2.7197e-03, PNorm = 152.0143, GNorm = 0.1350, lr_0 = 2.3582e-04
Loss = 3.0756e-03, PNorm = 152.0182, GNorm = 0.1858, lr_0 = 2.3566e-04
Loss = 2.1808e-03, PNorm = 152.0225, GNorm = 0.0835, lr_0 = 2.3550e-04
Loss = 3.7262e-03, PNorm = 152.0296, GNorm = 0.2166, lr_0 = 2.3534e-04
Loss = 4.1038e-03, PNorm = 152.0310, GNorm = 0.2642, lr_0 = 2.3518e-04
Loss = 3.1273e-03, PNorm = 152.0367, GNorm = 0.0850, lr_0 = 2.3502e-04
Loss = 3.0054e-03, PNorm = 152.0428, GNorm = 0.1311, lr_0 = 2.3486e-04
Loss = 4.1682e-03, PNorm = 152.0494, GNorm = 0.1318, lr_0 = 2.3470e-04
Loss = 4.3811e-03, PNorm = 152.0582, GNorm = 0.1995, lr_0 = 2.3454e-04
Loss = 2.4555e-03, PNorm = 152.0650, GNorm = 0.3203, lr_0 = 2.3437e-04
Loss = 2.5323e-03, PNorm = 152.0686, GNorm = 0.1289, lr_0 = 2.3421e-04
Loss = 2.7947e-03, PNorm = 152.0761, GNorm = 0.1603, lr_0 = 2.3405e-04
Loss = 2.6976e-03, PNorm = 152.0861, GNorm = 0.2822, lr_0 = 2.3389e-04
Loss = 5.2959e-03, PNorm = 152.0945, GNorm = 0.7569, lr_0 = 2.3373e-04
Loss = 3.1409e-03, PNorm = 152.0974, GNorm = 0.3032, lr_0 = 2.3357e-04
Loss = 2.6594e-03, PNorm = 152.1036, GNorm = 0.1105, lr_0 = 2.3341e-04
Loss = 2.8678e-03, PNorm = 152.1092, GNorm = 0.1665, lr_0 = 2.3325e-04
Loss = 2.8734e-03, PNorm = 152.1146, GNorm = 0.1012, lr_0 = 2.3309e-04
Loss = 2.1005e-03, PNorm = 152.1196, GNorm = 0.1381, lr_0 = 2.3293e-04
Loss = 5.2484e-03, PNorm = 152.1221, GNorm = 0.5553, lr_0 = 2.3277e-04
Loss = 2.0849e-03, PNorm = 152.1293, GNorm = 0.1437, lr_0 = 2.3261e-04
Loss = 2.1859e-03, PNorm = 152.1325, GNorm = 0.1648, lr_0 = 2.3246e-04
Loss = 3.2958e-03, PNorm = 152.1364, GNorm = 0.1763, lr_0 = 2.3230e-04
Loss = 3.1870e-03, PNorm = 152.1437, GNorm = 0.1994, lr_0 = 2.3214e-04
Loss = 2.3276e-03, PNorm = 152.1507, GNorm = 0.1196, lr_0 = 2.3198e-04
Loss = 3.8808e-03, PNorm = 152.1556, GNorm = 0.2279, lr_0 = 2.3182e-04
Loss = 2.5692e-03, PNorm = 152.1621, GNorm = 0.0558, lr_0 = 2.3166e-04
Loss = 2.0465e-03, PNorm = 152.1685, GNorm = 0.0548, lr_0 = 2.3150e-04
Loss = 4.6771e-03, PNorm = 152.1755, GNorm = 0.2409, lr_0 = 2.3134e-04
Loss = 3.4849e-03, PNorm = 152.1803, GNorm = 0.1692, lr_0 = 2.3118e-04
Loss = 3.2902e-03, PNorm = 152.1866, GNorm = 0.1127, lr_0 = 2.3103e-04
Loss = 2.0196e-03, PNorm = 152.1955, GNorm = 0.1245, lr_0 = 2.3087e-04
Loss = 3.1533e-03, PNorm = 152.2025, GNorm = 0.2068, lr_0 = 2.3071e-04
Loss = 2.2108e-03, PNorm = 152.2116, GNorm = 0.0809, lr_0 = 2.3055e-04
Loss = 3.5620e-03, PNorm = 152.2177, GNorm = 0.2526, lr_0 = 2.3039e-04
Loss = 3.2463e-03, PNorm = 152.2217, GNorm = 0.0547, lr_0 = 2.3024e-04
Loss = 2.0656e-03, PNorm = 152.2282, GNorm = 0.2538, lr_0 = 2.3008e-04
Loss = 2.6572e-03, PNorm = 152.2360, GNorm = 0.2474, lr_0 = 2.2992e-04
Loss = 1.9422e-03, PNorm = 152.2437, GNorm = 0.1633, lr_0 = 2.2976e-04
Loss = 3.0895e-03, PNorm = 152.2492, GNorm = 0.1571, lr_0 = 2.2961e-04
Loss = 2.4457e-03, PNorm = 152.2546, GNorm = 0.1474, lr_0 = 2.2945e-04
Loss = 2.6380e-03, PNorm = 152.2627, GNorm = 0.1656, lr_0 = 2.2929e-04
Loss = 2.4110e-03, PNorm = 152.2678, GNorm = 0.1805, lr_0 = 2.2913e-04
Loss = 2.6925e-03, PNorm = 152.2705, GNorm = 0.1433, lr_0 = 2.2898e-04
Loss = 3.0078e-03, PNorm = 152.2734, GNorm = 0.1695, lr_0 = 2.2882e-04
Loss = 3.7771e-03, PNorm = 152.2786, GNorm = 0.1119, lr_0 = 2.2866e-04
Loss = 4.3134e-03, PNorm = 152.2832, GNorm = 0.1759, lr_0 = 2.2851e-04
Loss = 2.3555e-03, PNorm = 152.2906, GNorm = 0.1255, lr_0 = 2.2835e-04
Loss = 2.6807e-03, PNorm = 152.2976, GNorm = 0.1335, lr_0 = 2.2819e-04
Loss = 2.0735e-03, PNorm = 152.3020, GNorm = 0.2606, lr_0 = 2.2804e-04
Loss = 3.3415e-03, PNorm = 152.3078, GNorm = 0.0675, lr_0 = 2.2788e-04
Loss = 4.3074e-03, PNorm = 152.3137, GNorm = 0.1980, lr_0 = 2.2773e-04
Loss = 3.2055e-03, PNorm = 152.3190, GNorm = 0.2105, lr_0 = 2.2757e-04
Validation mae = 0.474679
Epoch 20
Loss = 2.9887e-03, PNorm = 152.3245, GNorm = 0.0865, lr_0 = 2.2741e-04
Loss = 1.8686e-03, PNorm = 152.3297, GNorm = 0.0985, lr_0 = 2.2726e-04
Loss = 2.4017e-03, PNorm = 152.3330, GNorm = 0.2256, lr_0 = 2.2710e-04
Loss = 3.5433e-03, PNorm = 152.3364, GNorm = 0.2037, lr_0 = 2.2695e-04
Loss = 3.0691e-03, PNorm = 152.3434, GNorm = 0.3397, lr_0 = 2.2679e-04
Loss = 3.5645e-03, PNorm = 152.3481, GNorm = 0.2658, lr_0 = 2.2664e-04
Loss = 2.1926e-03, PNorm = 152.3518, GNorm = 0.4153, lr_0 = 2.2648e-04
Loss = 2.3927e-03, PNorm = 152.3543, GNorm = 0.1095, lr_0 = 2.2632e-04
Loss = 5.3795e-03, PNorm = 152.3592, GNorm = 0.1017, lr_0 = 2.2617e-04
Loss = 2.2208e-03, PNorm = 152.3647, GNorm = 0.1960, lr_0 = 2.2601e-04
Loss = 2.2693e-03, PNorm = 152.3695, GNorm = 0.1198, lr_0 = 2.2586e-04
Loss = 2.4921e-03, PNorm = 152.3719, GNorm = 0.1900, lr_0 = 2.2571e-04
Loss = 2.1249e-03, PNorm = 152.3748, GNorm = 0.1162, lr_0 = 2.2555e-04
Loss = 2.1516e-03, PNorm = 152.3804, GNorm = 0.1535, lr_0 = 2.2540e-04
Loss = 2.3285e-03, PNorm = 152.3885, GNorm = 0.2192, lr_0 = 2.2524e-04
Loss = 2.1259e-03, PNorm = 152.3943, GNorm = 0.2606, lr_0 = 2.2509e-04
Loss = 3.7061e-03, PNorm = 152.3996, GNorm = 0.0704, lr_0 = 2.2493e-04
Loss = 2.1537e-03, PNorm = 152.4021, GNorm = 0.1587, lr_0 = 2.2478e-04
Loss = 2.8095e-03, PNorm = 152.4023, GNorm = 0.3287, lr_0 = 2.2463e-04
Loss = 2.1299e-03, PNorm = 152.4067, GNorm = 0.1504, lr_0 = 2.2447e-04
Loss = 2.3098e-03, PNorm = 152.4123, GNorm = 0.1460, lr_0 = 2.2432e-04
Loss = 2.3813e-03, PNorm = 152.4194, GNorm = 0.0807, lr_0 = 2.2416e-04
Loss = 2.9518e-03, PNorm = 152.4254, GNorm = 0.1742, lr_0 = 2.2401e-04
Loss = 3.2622e-03, PNorm = 152.4292, GNorm = 0.1056, lr_0 = 2.2386e-04
Loss = 1.8792e-03, PNorm = 152.4317, GNorm = 0.1056, lr_0 = 2.2370e-04
Loss = 3.5674e-03, PNorm = 152.4346, GNorm = 0.0716, lr_0 = 2.2355e-04
Loss = 2.4536e-03, PNorm = 152.4388, GNorm = 0.2745, lr_0 = 2.2340e-04
Loss = 2.3676e-03, PNorm = 152.4448, GNorm = 0.1093, lr_0 = 2.2324e-04
Loss = 2.2463e-03, PNorm = 152.4507, GNorm = 0.1727, lr_0 = 2.2309e-04
Loss = 2.7091e-03, PNorm = 152.4565, GNorm = 0.2701, lr_0 = 2.2294e-04
Loss = 1.9901e-03, PNorm = 152.4612, GNorm = 0.1423, lr_0 = 2.2279e-04
Loss = 3.6119e-03, PNorm = 152.4648, GNorm = 0.0677, lr_0 = 2.2263e-04
Loss = 2.0230e-03, PNorm = 152.4701, GNorm = 0.1269, lr_0 = 2.2248e-04
Loss = 2.1838e-03, PNorm = 152.4741, GNorm = 0.2516, lr_0 = 2.2233e-04
Loss = 3.4153e-03, PNorm = 152.4774, GNorm = 0.1241, lr_0 = 2.2218e-04
Loss = 2.3025e-03, PNorm = 152.4843, GNorm = 0.2908, lr_0 = 2.2202e-04
Loss = 2.4338e-03, PNorm = 152.4887, GNorm = 0.2233, lr_0 = 2.2187e-04
Loss = 2.1517e-03, PNorm = 152.4926, GNorm = 0.1198, lr_0 = 2.2172e-04
Loss = 4.6659e-03, PNorm = 152.4955, GNorm = 0.1124, lr_0 = 2.2157e-04
Loss = 2.1085e-03, PNorm = 152.4996, GNorm = 0.1579, lr_0 = 2.2142e-04
Loss = 2.2075e-03, PNorm = 152.5060, GNorm = 0.2757, lr_0 = 2.2126e-04
Loss = 4.0689e-03, PNorm = 152.5096, GNorm = 0.0836, lr_0 = 2.2111e-04
Loss = 2.1015e-03, PNorm = 152.5140, GNorm = 0.0869, lr_0 = 2.2096e-04
Loss = 1.7997e-03, PNorm = 152.5179, GNorm = 0.0977, lr_0 = 2.2081e-04
Loss = 4.4519e-03, PNorm = 152.5232, GNorm = 0.0532, lr_0 = 2.2066e-04
Loss = 3.0890e-03, PNorm = 152.5291, GNorm = 0.1219, lr_0 = 2.2051e-04
Loss = 2.5183e-03, PNorm = 152.5342, GNorm = 0.1823, lr_0 = 2.2036e-04
Loss = 2.0086e-03, PNorm = 152.5382, GNorm = 0.1877, lr_0 = 2.2021e-04
Loss = 2.5539e-03, PNorm = 152.5421, GNorm = 0.1122, lr_0 = 2.2005e-04
Loss = 2.2672e-03, PNorm = 152.5469, GNorm = 0.0930, lr_0 = 2.1990e-04
Loss = 3.3526e-03, PNorm = 152.5511, GNorm = 0.3421, lr_0 = 2.1975e-04
Loss = 2.9299e-03, PNorm = 152.5539, GNorm = 0.2487, lr_0 = 2.1960e-04
Loss = 1.9696e-03, PNorm = 152.5560, GNorm = 0.1693, lr_0 = 2.1945e-04
Loss = 3.0371e-03, PNorm = 152.5632, GNorm = 0.1380, lr_0 = 2.1930e-04
Loss = 2.5227e-03, PNorm = 152.5694, GNorm = 0.1971, lr_0 = 2.1915e-04
Loss = 3.4555e-03, PNorm = 152.5747, GNorm = 0.1428, lr_0 = 2.1900e-04
Loss = 2.8472e-03, PNorm = 152.5806, GNorm = 0.1887, lr_0 = 2.1885e-04
Loss = 2.0970e-03, PNorm = 152.5859, GNorm = 0.0971, lr_0 = 2.1870e-04
Loss = 2.6729e-03, PNorm = 152.5929, GNorm = 0.1299, lr_0 = 2.1855e-04
Loss = 2.1722e-03, PNorm = 152.5991, GNorm = 0.1191, lr_0 = 2.1840e-04
Loss = 3.1228e-03, PNorm = 152.6036, GNorm = 0.1415, lr_0 = 2.1825e-04
Loss = 2.2889e-03, PNorm = 152.6081, GNorm = 0.1097, lr_0 = 2.1810e-04
Loss = 2.5666e-03, PNorm = 152.6084, GNorm = 0.1891, lr_0 = 2.1795e-04
Loss = 2.6394e-03, PNorm = 152.6122, GNorm = 0.0959, lr_0 = 2.1780e-04
Loss = 2.2141e-03, PNorm = 152.6158, GNorm = 0.0326, lr_0 = 2.1765e-04
Loss = 2.3717e-03, PNorm = 152.6238, GNorm = 0.1017, lr_0 = 2.1751e-04
Loss = 1.9934e-03, PNorm = 152.6296, GNorm = 0.1310, lr_0 = 2.1736e-04
Loss = 2.6109e-03, PNorm = 152.6373, GNorm = 0.0529, lr_0 = 2.1721e-04
Loss = 2.1060e-03, PNorm = 152.6427, GNorm = 0.3231, lr_0 = 2.1706e-04
Loss = 2.0757e-03, PNorm = 152.6488, GNorm = 0.0970, lr_0 = 2.1691e-04
Loss = 2.6498e-03, PNorm = 152.6537, GNorm = 0.1250, lr_0 = 2.1676e-04
Loss = 3.0432e-03, PNorm = 152.6574, GNorm = 0.2845, lr_0 = 2.1661e-04
Loss = 4.4400e-03, PNorm = 152.6611, GNorm = 0.1451, lr_0 = 2.1646e-04
Loss = 2.8952e-03, PNorm = 152.6668, GNorm = 0.1872, lr_0 = 2.1632e-04
Loss = 3.0332e-03, PNorm = 152.6735, GNorm = 0.1596, lr_0 = 2.1617e-04
Loss = 2.6218e-03, PNorm = 152.6788, GNorm = 0.2261, lr_0 = 2.1602e-04
Loss = 1.9878e-03, PNorm = 152.6861, GNorm = 0.2032, lr_0 = 2.1587e-04
Loss = 2.9586e-03, PNorm = 152.6911, GNorm = 0.1113, lr_0 = 2.1572e-04
Loss = 3.7219e-03, PNorm = 152.6966, GNorm = 0.1642, lr_0 = 2.1558e-04
Loss = 2.1356e-03, PNorm = 152.7029, GNorm = 0.1119, lr_0 = 2.1543e-04
Loss = 3.2844e-03, PNorm = 152.7093, GNorm = 0.0573, lr_0 = 2.1528e-04
Loss = 2.2745e-03, PNorm = 152.7162, GNorm = 0.0980, lr_0 = 2.1513e-04
Loss = 1.9968e-03, PNorm = 152.7220, GNorm = 0.4340, lr_0 = 2.1499e-04
Loss = 2.6041e-03, PNorm = 152.7269, GNorm = 0.0998, lr_0 = 2.1484e-04
Loss = 2.2054e-03, PNorm = 152.7337, GNorm = 0.1670, lr_0 = 2.1469e-04
Loss = 1.9288e-03, PNorm = 152.7384, GNorm = 0.1584, lr_0 = 2.1454e-04
Loss = 2.0595e-03, PNorm = 152.7441, GNorm = 0.1230, lr_0 = 2.1440e-04
Loss = 1.9010e-03, PNorm = 152.7456, GNorm = 0.1795, lr_0 = 2.1425e-04
Loss = 3.8744e-03, PNorm = 152.7503, GNorm = 0.0648, lr_0 = 2.1410e-04
Loss = 2.6493e-03, PNorm = 152.7538, GNorm = 0.1721, lr_0 = 2.1396e-04
Loss = 2.9875e-03, PNorm = 152.7578, GNorm = 0.1377, lr_0 = 2.1381e-04
Loss = 3.4910e-03, PNorm = 152.7615, GNorm = 0.0841, lr_0 = 2.1366e-04
Loss = 1.7171e-03, PNorm = 152.7673, GNorm = 0.0623, lr_0 = 2.1352e-04
Loss = 3.0959e-03, PNorm = 152.7702, GNorm = 0.2116, lr_0 = 2.1337e-04
Loss = 2.6164e-03, PNorm = 152.7741, GNorm = 0.0854, lr_0 = 2.1323e-04
Loss = 1.8617e-03, PNorm = 152.7798, GNorm = 0.1026, lr_0 = 2.1308e-04
Loss = 5.9520e-03, PNorm = 152.7826, GNorm = 0.1702, lr_0 = 2.1293e-04
Loss = 2.9725e-03, PNorm = 152.7867, GNorm = 0.1382, lr_0 = 2.1279e-04
Loss = 2.6163e-03, PNorm = 152.7931, GNorm = 0.0986, lr_0 = 2.1264e-04
Loss = 3.8217e-03, PNorm = 152.7972, GNorm = 0.1721, lr_0 = 2.1250e-04
Loss = 2.1510e-03, PNorm = 152.8023, GNorm = 0.1901, lr_0 = 2.1235e-04
Loss = 2.8996e-03, PNorm = 152.8080, GNorm = 0.1916, lr_0 = 2.1221e-04
Loss = 2.2824e-03, PNorm = 152.8137, GNorm = 0.0693, lr_0 = 2.1206e-04
Loss = 2.4787e-03, PNorm = 152.8207, GNorm = 0.2312, lr_0 = 2.1191e-04
Loss = 2.2866e-03, PNorm = 152.8259, GNorm = 0.0699, lr_0 = 2.1177e-04
Loss = 3.9979e-03, PNorm = 152.8315, GNorm = 0.0943, lr_0 = 2.1162e-04
Loss = 1.4637e-03, PNorm = 152.8365, GNorm = 0.1444, lr_0 = 2.1148e-04
Loss = 3.8437e-03, PNorm = 152.8413, GNorm = 0.1085, lr_0 = 2.1133e-04
Loss = 4.5244e-03, PNorm = 152.8446, GNorm = 0.3022, lr_0 = 2.1119e-04
Loss = 2.3214e-03, PNorm = 152.8497, GNorm = 0.0555, lr_0 = 2.1104e-04
Loss = 1.7934e-03, PNorm = 152.8553, GNorm = 0.0807, lr_0 = 2.1090e-04
Loss = 2.1874e-03, PNorm = 152.8615, GNorm = 0.3154, lr_0 = 2.1076e-04
Loss = 1.7614e-03, PNorm = 152.8647, GNorm = 0.1698, lr_0 = 2.1061e-04
Loss = 2.9467e-03, PNorm = 152.8668, GNorm = 0.2462, lr_0 = 2.1047e-04
Loss = 1.9731e-03, PNorm = 152.8715, GNorm = 0.1480, lr_0 = 2.1032e-04
Loss = 4.0832e-03, PNorm = 152.8763, GNorm = 0.1654, lr_0 = 2.1018e-04
Loss = 3.3562e-03, PNorm = 152.8801, GNorm = 0.1493, lr_0 = 2.1003e-04
Loss = 2.8795e-03, PNorm = 152.8850, GNorm = 0.0428, lr_0 = 2.0989e-04
Loss = 3.9081e-03, PNorm = 152.8942, GNorm = 0.1643, lr_0 = 2.0975e-04
Loss = 3.6556e-03, PNorm = 152.9007, GNorm = 0.0962, lr_0 = 2.0960e-04
Validation mae = 0.475298
Epoch 21
Loss = 1.5231e-03, PNorm = 152.9065, GNorm = 0.1122, lr_0 = 2.0946e-04
Loss = 2.4766e-03, PNorm = 152.9086, GNorm = 0.2777, lr_0 = 2.0932e-04
Loss = 1.7393e-03, PNorm = 152.9135, GNorm = 0.1636, lr_0 = 2.0917e-04
Loss = 1.9574e-03, PNorm = 152.9164, GNorm = 0.1328, lr_0 = 2.0903e-04
Loss = 2.4192e-03, PNorm = 152.9205, GNorm = 0.2238, lr_0 = 2.0889e-04
Loss = 2.1847e-03, PNorm = 152.9245, GNorm = 0.1686, lr_0 = 2.0874e-04
Loss = 1.6951e-03, PNorm = 152.9272, GNorm = 0.1023, lr_0 = 2.0860e-04
Loss = 2.4229e-03, PNorm = 152.9293, GNorm = 0.1256, lr_0 = 2.0846e-04
Loss = 1.8630e-03, PNorm = 152.9353, GNorm = 0.0700, lr_0 = 2.0831e-04
Loss = 2.6646e-03, PNorm = 152.9401, GNorm = 0.1655, lr_0 = 2.0817e-04
Loss = 1.4855e-03, PNorm = 152.9438, GNorm = 0.0614, lr_0 = 2.0803e-04
Loss = 2.8829e-03, PNorm = 152.9465, GNorm = 0.1573, lr_0 = 2.0789e-04
Loss = 1.7761e-03, PNorm = 152.9480, GNorm = 0.2110, lr_0 = 2.0774e-04
Loss = 1.8800e-03, PNorm = 152.9512, GNorm = 0.1839, lr_0 = 2.0760e-04
Loss = 1.9513e-03, PNorm = 152.9543, GNorm = 0.1862, lr_0 = 2.0746e-04
Loss = 2.3018e-03, PNorm = 152.9607, GNorm = 0.1626, lr_0 = 2.0732e-04
Loss = 3.0727e-03, PNorm = 152.9655, GNorm = 0.0553, lr_0 = 2.0718e-04
Loss = 4.4612e-03, PNorm = 152.9696, GNorm = 0.1138, lr_0 = 2.0703e-04
Loss = 1.5631e-03, PNorm = 152.9726, GNorm = 0.0452, lr_0 = 2.0689e-04
Loss = 2.4133e-03, PNorm = 152.9774, GNorm = 0.1151, lr_0 = 2.0675e-04
Loss = 2.7552e-03, PNorm = 152.9823, GNorm = 0.1409, lr_0 = 2.0661e-04
Loss = 4.1662e-03, PNorm = 152.9851, GNorm = 0.4227, lr_0 = 2.0647e-04
Loss = 2.6551e-03, PNorm = 152.9883, GNorm = 0.2224, lr_0 = 2.0633e-04
Loss = 1.8335e-03, PNorm = 152.9937, GNorm = 0.1540, lr_0 = 2.0618e-04
Loss = 3.5031e-03, PNorm = 152.9991, GNorm = 0.3825, lr_0 = 2.0604e-04
Loss = 2.0272e-03, PNorm = 153.0040, GNorm = 0.1016, lr_0 = 2.0590e-04
Loss = 1.7860e-03, PNorm = 153.0069, GNorm = 0.0681, lr_0 = 2.0576e-04
Loss = 2.5550e-03, PNorm = 153.0081, GNorm = 0.2107, lr_0 = 2.0562e-04
Loss = 1.9581e-03, PNorm = 153.0109, GNorm = 0.1242, lr_0 = 2.0548e-04
Loss = 1.6235e-03, PNorm = 153.0154, GNorm = 0.1200, lr_0 = 2.0534e-04
Loss = 2.1488e-03, PNorm = 153.0184, GNorm = 0.2280, lr_0 = 2.0520e-04
Loss = 1.6199e-03, PNorm = 153.0181, GNorm = 0.0974, lr_0 = 2.0506e-04
Loss = 2.2738e-03, PNorm = 153.0213, GNorm = 0.1063, lr_0 = 2.0492e-04
Loss = 1.8825e-03, PNorm = 153.0264, GNorm = 0.3266, lr_0 = 2.0478e-04
Loss = 2.1936e-03, PNorm = 153.0287, GNorm = 0.0335, lr_0 = 2.0464e-04
Loss = 1.6929e-03, PNorm = 153.0304, GNorm = 0.0577, lr_0 = 2.0450e-04
Loss = 2.4865e-03, PNorm = 153.0338, GNorm = 0.1392, lr_0 = 2.0436e-04
Loss = 1.5625e-03, PNorm = 153.0371, GNorm = 0.0986, lr_0 = 2.0422e-04
Loss = 1.3775e-03, PNorm = 153.0413, GNorm = 0.1995, lr_0 = 2.0408e-04
Loss = 2.3269e-03, PNorm = 153.0449, GNorm = 0.1917, lr_0 = 2.0394e-04
Loss = 1.5825e-03, PNorm = 153.0504, GNorm = 0.0598, lr_0 = 2.0380e-04
Loss = 2.2789e-03, PNorm = 153.0524, GNorm = 0.1286, lr_0 = 2.0366e-04
Loss = 1.7463e-03, PNorm = 153.0596, GNorm = 0.0987, lr_0 = 2.0352e-04
Loss = 1.9343e-03, PNorm = 153.0655, GNorm = 0.0915, lr_0 = 2.0338e-04
Loss = 1.7867e-03, PNorm = 153.0682, GNorm = 0.0517, lr_0 = 2.0324e-04
Loss = 1.6733e-03, PNorm = 153.0701, GNorm = 0.0709, lr_0 = 2.0310e-04
Loss = 2.5362e-03, PNorm = 153.0740, GNorm = 0.0810, lr_0 = 2.0296e-04
Loss = 1.9847e-03, PNorm = 153.0779, GNorm = 0.0509, lr_0 = 2.0282e-04
Loss = 2.0852e-03, PNorm = 153.0821, GNorm = 0.2269, lr_0 = 2.0268e-04
Loss = 1.7142e-03, PNorm = 153.0867, GNorm = 0.0962, lr_0 = 2.0254e-04
Loss = 2.2967e-03, PNorm = 153.0880, GNorm = 0.0920, lr_0 = 2.0240e-04
Loss = 2.0128e-03, PNorm = 153.0931, GNorm = 0.0999, lr_0 = 2.0227e-04
Loss = 1.5618e-03, PNorm = 153.0976, GNorm = 0.1084, lr_0 = 2.0213e-04
Loss = 2.6335e-03, PNorm = 153.1017, GNorm = 0.1294, lr_0 = 2.0199e-04
Loss = 3.3304e-03, PNorm = 153.1078, GNorm = 0.0904, lr_0 = 2.0185e-04
Loss = 4.2875e-03, PNorm = 153.1127, GNorm = 0.1801, lr_0 = 2.0171e-04
Loss = 2.0850e-03, PNorm = 153.1134, GNorm = 0.1896, lr_0 = 2.0157e-04
Loss = 2.4485e-03, PNorm = 153.1179, GNorm = 0.2342, lr_0 = 2.0144e-04
Loss = 2.3854e-03, PNorm = 153.1213, GNorm = 0.1204, lr_0 = 2.0130e-04
Loss = 1.6492e-03, PNorm = 153.1271, GNorm = 0.1017, lr_0 = 2.0116e-04
Loss = 1.9513e-03, PNorm = 153.1306, GNorm = 0.0644, lr_0 = 2.0102e-04
Loss = 2.3767e-03, PNorm = 153.1368, GNorm = 0.1052, lr_0 = 2.0088e-04
Loss = 1.8286e-03, PNorm = 153.1404, GNorm = 0.0684, lr_0 = 2.0075e-04
Loss = 1.4716e-03, PNorm = 153.1434, GNorm = 0.0794, lr_0 = 2.0061e-04
Loss = 2.6349e-03, PNorm = 153.1469, GNorm = 0.0761, lr_0 = 2.0047e-04
Loss = 4.1425e-03, PNorm = 153.1507, GNorm = 0.3976, lr_0 = 2.0033e-04
Loss = 2.2319e-03, PNorm = 153.1553, GNorm = 0.1446, lr_0 = 2.0020e-04
Loss = 2.1445e-03, PNorm = 153.1626, GNorm = 0.0702, lr_0 = 2.0006e-04
Loss = 4.1256e-03, PNorm = 153.1676, GNorm = 0.3553, lr_0 = 1.9992e-04
Loss = 2.8803e-03, PNorm = 153.1721, GNorm = 0.2585, lr_0 = 1.9979e-04
Loss = 1.8482e-03, PNorm = 153.1729, GNorm = 0.1034, lr_0 = 1.9965e-04
Loss = 2.4741e-03, PNorm = 153.1773, GNorm = 0.0913, lr_0 = 1.9951e-04
Loss = 2.1281e-03, PNorm = 153.1839, GNorm = 0.2666, lr_0 = 1.9938e-04
Loss = 3.1858e-03, PNorm = 153.1903, GNorm = 0.2164, lr_0 = 1.9924e-04
Loss = 1.6663e-03, PNorm = 153.1963, GNorm = 0.0699, lr_0 = 1.9910e-04
Loss = 1.8830e-03, PNorm = 153.2000, GNorm = 0.1013, lr_0 = 1.9897e-04
Loss = 1.8446e-03, PNorm = 153.2024, GNorm = 0.1380, lr_0 = 1.9883e-04
Loss = 3.2142e-03, PNorm = 153.2058, GNorm = 0.2651, lr_0 = 1.9869e-04
Loss = 4.9111e-03, PNorm = 153.2121, GNorm = 0.1414, lr_0 = 1.9856e-04
Loss = 1.6761e-03, PNorm = 153.2168, GNorm = 0.0947, lr_0 = 1.9842e-04
Loss = 1.7970e-03, PNorm = 153.2217, GNorm = 0.1380, lr_0 = 1.9829e-04
Loss = 2.1957e-03, PNorm = 153.2269, GNorm = 0.2031, lr_0 = 1.9815e-04
Loss = 2.1083e-03, PNorm = 153.2317, GNorm = 0.1120, lr_0 = 1.9801e-04
Loss = 4.2871e-03, PNorm = 153.2358, GNorm = 0.4205, lr_0 = 1.9788e-04
Loss = 1.9350e-03, PNorm = 153.2405, GNorm = 0.0711, lr_0 = 1.9774e-04
Loss = 1.5441e-03, PNorm = 153.2460, GNorm = 0.0667, lr_0 = 1.9761e-04
Loss = 1.2312e-03, PNorm = 153.2509, GNorm = 0.0449, lr_0 = 1.9747e-04
Loss = 1.7986e-03, PNorm = 153.2540, GNorm = 0.0617, lr_0 = 1.9734e-04
Loss = 2.6557e-03, PNorm = 153.2566, GNorm = 0.2097, lr_0 = 1.9720e-04
Loss = 1.8476e-03, PNorm = 153.2610, GNorm = 0.1202, lr_0 = 1.9707e-04
Loss = 4.1579e-03, PNorm = 153.2662, GNorm = 0.1987, lr_0 = 1.9693e-04
Loss = 3.8967e-03, PNorm = 153.2699, GNorm = 0.4819, lr_0 = 1.9680e-04
Loss = 2.2483e-03, PNorm = 153.2722, GNorm = 0.1595, lr_0 = 1.9666e-04
Loss = 4.4143e-03, PNorm = 153.2769, GNorm = 0.2205, lr_0 = 1.9653e-04
Loss = 1.8658e-03, PNorm = 153.2794, GNorm = 0.1908, lr_0 = 1.9639e-04
Loss = 2.4386e-03, PNorm = 153.2837, GNorm = 0.1412, lr_0 = 1.9626e-04
Loss = 1.9466e-03, PNorm = 153.2879, GNorm = 0.0564, lr_0 = 1.9612e-04
Loss = 2.0575e-03, PNorm = 153.2898, GNorm = 0.0763, lr_0 = 1.9599e-04
Loss = 3.9277e-03, PNorm = 153.2944, GNorm = 0.1990, lr_0 = 1.9585e-04
Loss = 3.1699e-03, PNorm = 153.3003, GNorm = 0.3486, lr_0 = 1.9572e-04
Loss = 2.4595e-03, PNorm = 153.3075, GNorm = 0.0851, lr_0 = 1.9559e-04
Loss = 1.5062e-03, PNorm = 153.3136, GNorm = 0.0884, lr_0 = 1.9545e-04
Loss = 2.0744e-03, PNorm = 153.3173, GNorm = 0.1244, lr_0 = 1.9532e-04
Loss = 1.5861e-03, PNorm = 153.3215, GNorm = 0.0435, lr_0 = 1.9518e-04
Loss = 1.8212e-03, PNorm = 153.3245, GNorm = 0.0489, lr_0 = 1.9505e-04
Loss = 2.9991e-03, PNorm = 153.3273, GNorm = 0.2032, lr_0 = 1.9492e-04
Loss = 1.4914e-03, PNorm = 153.3312, GNorm = 0.0648, lr_0 = 1.9478e-04
Loss = 3.4808e-03, PNorm = 153.3364, GNorm = 0.1431, lr_0 = 1.9465e-04
Loss = 1.6439e-03, PNorm = 153.3387, GNorm = 0.1397, lr_0 = 1.9452e-04
Loss = 2.6774e-03, PNorm = 153.3432, GNorm = 0.1566, lr_0 = 1.9438e-04
Loss = 1.8214e-03, PNorm = 153.3457, GNorm = 0.1289, lr_0 = 1.9425e-04
Loss = 3.3606e-03, PNorm = 153.3503, GNorm = 0.1667, lr_0 = 1.9412e-04
Loss = 5.4696e-03, PNorm = 153.3559, GNorm = 0.1080, lr_0 = 1.9398e-04
Loss = 2.1274e-03, PNorm = 153.3595, GNorm = 0.2101, lr_0 = 1.9385e-04
Loss = 4.0456e-03, PNorm = 153.3663, GNorm = 0.2330, lr_0 = 1.9372e-04
Loss = 3.2479e-03, PNorm = 153.3683, GNorm = 0.2957, lr_0 = 1.9359e-04
Loss = 2.6822e-03, PNorm = 153.3717, GNorm = 0.2440, lr_0 = 1.9345e-04
Loss = 3.1241e-03, PNorm = 153.3774, GNorm = 0.0941, lr_0 = 1.9332e-04
Loss = 1.9493e-03, PNorm = 153.3802, GNorm = 0.1039, lr_0 = 1.9319e-04
Loss = 1.3605e-03, PNorm = 153.3820, GNorm = 0.1037, lr_0 = 1.9306e-04
Validation mae = 0.475193
Epoch 22
Loss = 2.4196e-03, PNorm = 153.3857, GNorm = 0.2275, lr_0 = 1.9292e-04
Loss = 2.2600e-03, PNorm = 153.3873, GNorm = 0.0467, lr_0 = 1.9279e-04
Loss = 2.4840e-03, PNorm = 153.3868, GNorm = 0.0859, lr_0 = 1.9266e-04
Loss = 2.3566e-03, PNorm = 153.3901, GNorm = 0.1645, lr_0 = 1.9253e-04
Loss = 1.9288e-03, PNorm = 153.3915, GNorm = 0.0360, lr_0 = 1.9240e-04
Loss = 1.4509e-03, PNorm = 153.3941, GNorm = 0.1674, lr_0 = 1.9226e-04
Loss = 2.0076e-03, PNorm = 153.3944, GNorm = 0.4562, lr_0 = 1.9213e-04
Loss = 2.1452e-03, PNorm = 153.3949, GNorm = 0.1630, lr_0 = 1.9200e-04
Loss = 2.5056e-03, PNorm = 153.4007, GNorm = 0.0731, lr_0 = 1.9187e-04
Loss = 1.4092e-03, PNorm = 153.4050, GNorm = 0.0615, lr_0 = 1.9174e-04
Loss = 1.6514e-03, PNorm = 153.4082, GNorm = 0.2281, lr_0 = 1.9161e-04
Loss = 2.7620e-03, PNorm = 153.4105, GNorm = 0.1646, lr_0 = 1.9148e-04
Loss = 1.4432e-03, PNorm = 153.4132, GNorm = 0.1278, lr_0 = 1.9134e-04
Loss = 1.7959e-03, PNorm = 153.4168, GNorm = 0.0773, lr_0 = 1.9121e-04
Loss = 1.7967e-03, PNorm = 153.4207, GNorm = 0.2174, lr_0 = 1.9108e-04
Loss = 1.6695e-03, PNorm = 153.4246, GNorm = 0.1948, lr_0 = 1.9095e-04
Loss = 1.8896e-03, PNorm = 153.4288, GNorm = 0.2032, lr_0 = 1.9082e-04
Loss = 1.4587e-03, PNorm = 153.4315, GNorm = 0.1856, lr_0 = 1.9069e-04
Loss = 1.3316e-03, PNorm = 153.4347, GNorm = 0.1120, lr_0 = 1.9056e-04
Loss = 1.5800e-03, PNorm = 153.4384, GNorm = 0.2571, lr_0 = 1.9043e-04
Loss = 2.1236e-03, PNorm = 153.4429, GNorm = 0.3105, lr_0 = 1.9030e-04
Loss = 1.5408e-03, PNorm = 153.4482, GNorm = 0.1323, lr_0 = 1.9017e-04
Loss = 1.5467e-03, PNorm = 153.4491, GNorm = 0.2338, lr_0 = 1.9004e-04
Loss = 1.8068e-03, PNorm = 153.4523, GNorm = 0.1921, lr_0 = 1.8991e-04
Loss = 1.3566e-03, PNorm = 153.4549, GNorm = 0.0607, lr_0 = 1.8978e-04
Loss = 1.4871e-03, PNorm = 153.4585, GNorm = 0.2454, lr_0 = 1.8965e-04
Loss = 1.2667e-03, PNorm = 153.4611, GNorm = 0.0319, lr_0 = 1.8952e-04
Loss = 2.3950e-03, PNorm = 153.4655, GNorm = 0.1669, lr_0 = 1.8939e-04
Loss = 2.4154e-03, PNorm = 153.4673, GNorm = 0.1565, lr_0 = 1.8926e-04
Loss = 2.1899e-03, PNorm = 153.4685, GNorm = 0.1088, lr_0 = 1.8913e-04
Loss = 1.9161e-03, PNorm = 153.4717, GNorm = 0.0424, lr_0 = 1.8900e-04
Loss = 1.4953e-03, PNorm = 153.4751, GNorm = 0.0825, lr_0 = 1.8887e-04
Loss = 2.1251e-03, PNorm = 153.4762, GNorm = 0.1204, lr_0 = 1.8874e-04
Loss = 1.4843e-03, PNorm = 153.4801, GNorm = 0.2149, lr_0 = 1.8861e-04
Loss = 2.0134e-03, PNorm = 153.4871, GNorm = 0.0689, lr_0 = 1.8848e-04
Loss = 2.2234e-03, PNorm = 153.4921, GNorm = 0.1067, lr_0 = 1.8835e-04
Loss = 2.3196e-03, PNorm = 153.4952, GNorm = 0.0997, lr_0 = 1.8822e-04
Loss = 1.7773e-03, PNorm = 153.4980, GNorm = 0.1084, lr_0 = 1.8809e-04
Loss = 2.0634e-03, PNorm = 153.5012, GNorm = 0.1084, lr_0 = 1.8797e-04
Loss = 1.6726e-03, PNorm = 153.5050, GNorm = 0.1933, lr_0 = 1.8784e-04
Loss = 2.5587e-03, PNorm = 153.5071, GNorm = 0.1946, lr_0 = 1.8771e-04
Loss = 2.2763e-03, PNorm = 153.5116, GNorm = 0.0888, lr_0 = 1.8758e-04
Loss = 2.2206e-03, PNorm = 153.5146, GNorm = 0.0960, lr_0 = 1.8745e-04
Loss = 1.8051e-03, PNorm = 153.5178, GNorm = 0.1136, lr_0 = 1.8732e-04
Loss = 2.5369e-03, PNorm = 153.5216, GNorm = 0.1490, lr_0 = 1.8719e-04
Loss = 2.0628e-03, PNorm = 153.5258, GNorm = 0.1956, lr_0 = 1.8707e-04
Loss = 1.7006e-03, PNorm = 153.5303, GNorm = 0.1594, lr_0 = 1.8694e-04
Loss = 3.6484e-03, PNorm = 153.5346, GNorm = 0.0855, lr_0 = 1.8681e-04
Loss = 2.4535e-03, PNorm = 153.5393, GNorm = 0.0502, lr_0 = 1.8668e-04
Loss = 1.9675e-03, PNorm = 153.5420, GNorm = 0.1117, lr_0 = 1.8655e-04
Loss = 2.2538e-03, PNorm = 153.5442, GNorm = 0.1270, lr_0 = 1.8643e-04
Loss = 1.5358e-03, PNorm = 153.5487, GNorm = 0.1017, lr_0 = 1.8630e-04
Loss = 5.5962e-03, PNorm = 153.5514, GNorm = 0.2085, lr_0 = 1.8617e-04
Loss = 2.7044e-03, PNorm = 153.5539, GNorm = 0.2073, lr_0 = 1.8604e-04
Loss = 1.4331e-03, PNorm = 153.5562, GNorm = 0.0648, lr_0 = 1.8592e-04
Loss = 1.1889e-03, PNorm = 153.5585, GNorm = 0.0813, lr_0 = 1.8579e-04
Loss = 2.8691e-03, PNorm = 153.5617, GNorm = 0.1640, lr_0 = 1.8566e-04
Loss = 4.0955e-03, PNorm = 153.5649, GNorm = 0.3067, lr_0 = 1.8553e-04
Loss = 1.2892e-03, PNorm = 153.5700, GNorm = 0.0769, lr_0 = 1.8541e-04
Loss = 1.5486e-03, PNorm = 153.5727, GNorm = 0.1631, lr_0 = 1.8528e-04
Loss = 2.7088e-03, PNorm = 153.5754, GNorm = 0.0843, lr_0 = 1.8515e-04
Loss = 1.6796e-03, PNorm = 153.5771, GNorm = 0.2294, lr_0 = 1.8503e-04
Loss = 1.4632e-03, PNorm = 153.5798, GNorm = 0.0654, lr_0 = 1.8490e-04
Loss = 1.3180e-03, PNorm = 153.5842, GNorm = 0.1073, lr_0 = 1.8477e-04
Loss = 2.5118e-03, PNorm = 153.5882, GNorm = 0.1588, lr_0 = 1.8465e-04
Loss = 5.3796e-03, PNorm = 153.5890, GNorm = 0.1739, lr_0 = 1.8452e-04
Loss = 6.0497e-03, PNorm = 153.5917, GNorm = 0.1050, lr_0 = 1.8439e-04
Loss = 2.9138e-03, PNorm = 153.5978, GNorm = 0.2188, lr_0 = 1.8427e-04
Loss = 1.9113e-03, PNorm = 153.6025, GNorm = 0.1505, lr_0 = 1.8414e-04
Loss = 2.5372e-03, PNorm = 153.6071, GNorm = 0.1631, lr_0 = 1.8401e-04
Loss = 1.7611e-03, PNorm = 153.6097, GNorm = 0.0861, lr_0 = 1.8389e-04
Loss = 2.3483e-03, PNorm = 153.6122, GNorm = 0.0606, lr_0 = 1.8376e-04
Loss = 1.6394e-03, PNorm = 153.6154, GNorm = 0.0559, lr_0 = 1.8364e-04
Loss = 1.5346e-03, PNorm = 153.6195, GNorm = 0.1389, lr_0 = 1.8351e-04
Loss = 2.1909e-03, PNorm = 153.6231, GNorm = 0.1633, lr_0 = 1.8338e-04
Loss = 1.9529e-03, PNorm = 153.6273, GNorm = 0.0622, lr_0 = 1.8326e-04
Loss = 1.5280e-03, PNorm = 153.6308, GNorm = 0.1345, lr_0 = 1.8313e-04
Loss = 1.9736e-03, PNorm = 153.6326, GNorm = 0.0977, lr_0 = 1.8301e-04
Loss = 1.9802e-03, PNorm = 153.6344, GNorm = 0.0513, lr_0 = 1.8288e-04
Loss = 1.6802e-03, PNorm = 153.6364, GNorm = 0.0585, lr_0 = 1.8276e-04
Loss = 2.7157e-03, PNorm = 153.6397, GNorm = 0.2293, lr_0 = 1.8263e-04
Loss = 1.5195e-03, PNorm = 153.6444, GNorm = 0.1146, lr_0 = 1.8251e-04
Loss = 1.6657e-03, PNorm = 153.6457, GNorm = 0.0810, lr_0 = 1.8238e-04
Loss = 1.2431e-03, PNorm = 153.6496, GNorm = 0.1890, lr_0 = 1.8226e-04
Loss = 1.7295e-03, PNorm = 153.6534, GNorm = 0.1822, lr_0 = 1.8213e-04
Loss = 1.5481e-03, PNorm = 153.6602, GNorm = 0.1493, lr_0 = 1.8201e-04
Loss = 2.7375e-03, PNorm = 153.6651, GNorm = 0.1156, lr_0 = 1.8188e-04
Loss = 1.6475e-03, PNorm = 153.6679, GNorm = 0.0393, lr_0 = 1.8176e-04
Loss = 2.5441e-03, PNorm = 153.6709, GNorm = 0.1699, lr_0 = 1.8163e-04
Loss = 1.3916e-03, PNorm = 153.6734, GNorm = 0.0788, lr_0 = 1.8151e-04
Loss = 2.9860e-03, PNorm = 153.6754, GNorm = 0.2065, lr_0 = 1.8138e-04
Loss = 2.8295e-03, PNorm = 153.6775, GNorm = 0.2273, lr_0 = 1.8126e-04
Loss = 1.3684e-03, PNorm = 153.6803, GNorm = 0.1005, lr_0 = 1.8114e-04
Loss = 2.2490e-03, PNorm = 153.6831, GNorm = 0.1021, lr_0 = 1.8101e-04
Loss = 2.2134e-03, PNorm = 153.6878, GNorm = 0.0558, lr_0 = 1.8089e-04
Loss = 1.7731e-03, PNorm = 153.6904, GNorm = 0.0785, lr_0 = 1.8076e-04
Loss = 1.2814e-03, PNorm = 153.6937, GNorm = 0.0883, lr_0 = 1.8064e-04
Loss = 1.5166e-03, PNorm = 153.6964, GNorm = 0.0772, lr_0 = 1.8052e-04
Loss = 2.6783e-03, PNorm = 153.7020, GNorm = 0.4054, lr_0 = 1.8039e-04
Loss = 3.7384e-03, PNorm = 153.7067, GNorm = 0.2413, lr_0 = 1.8027e-04
Loss = 2.5212e-03, PNorm = 153.7079, GNorm = 0.1495, lr_0 = 1.8015e-04
Loss = 1.1847e-03, PNorm = 153.7119, GNorm = 0.0630, lr_0 = 1.8002e-04
Loss = 1.9168e-03, PNorm = 153.7154, GNorm = 0.0946, lr_0 = 1.7990e-04
Loss = 2.3262e-03, PNorm = 153.7193, GNorm = 0.0643, lr_0 = 1.7978e-04
Loss = 1.7002e-03, PNorm = 153.7241, GNorm = 0.0417, lr_0 = 1.7965e-04
Loss = 2.3745e-03, PNorm = 153.7260, GNorm = 0.1428, lr_0 = 1.7953e-04
Loss = 1.3259e-03, PNorm = 153.7282, GNorm = 0.0708, lr_0 = 1.7941e-04
Loss = 1.6644e-03, PNorm = 153.7328, GNorm = 0.0487, lr_0 = 1.7928e-04
Loss = 1.1492e-03, PNorm = 153.7406, GNorm = 0.1101, lr_0 = 1.7916e-04
Loss = 1.8741e-03, PNorm = 153.7440, GNorm = 0.1538, lr_0 = 1.7904e-04
Loss = 1.3248e-03, PNorm = 153.7476, GNorm = 0.0406, lr_0 = 1.7892e-04
Loss = 3.3363e-03, PNorm = 153.7521, GNorm = 0.2516, lr_0 = 1.7879e-04
Loss = 1.6158e-03, PNorm = 153.7558, GNorm = 0.2187, lr_0 = 1.7867e-04
Loss = 3.3661e-03, PNorm = 153.7593, GNorm = 0.1912, lr_0 = 1.7855e-04
Loss = 1.7117e-03, PNorm = 153.7651, GNorm = 0.1315, lr_0 = 1.7843e-04
Loss = 1.5033e-03, PNorm = 153.7693, GNorm = 0.0591, lr_0 = 1.7830e-04
Loss = 3.4004e-03, PNorm = 153.7749, GNorm = 0.1317, lr_0 = 1.7818e-04
Loss = 1.8441e-03, PNorm = 153.7789, GNorm = 0.1305, lr_0 = 1.7806e-04
Loss = 1.7466e-03, PNorm = 153.7806, GNorm = 0.0941, lr_0 = 1.7794e-04
Loss = 3.6923e-03, PNorm = 153.7802, GNorm = 0.1352, lr_0 = 1.7782e-04
Validation mae = 0.475670
Epoch 23
Loss = 4.5146e-03, PNorm = 153.7817, GNorm = 0.1518, lr_0 = 1.7769e-04
Loss = 1.1707e-03, PNorm = 153.7852, GNorm = 0.1400, lr_0 = 1.7757e-04
Loss = 2.2385e-03, PNorm = 153.7878, GNorm = 0.1038, lr_0 = 1.7745e-04
Loss = 1.4599e-03, PNorm = 153.7901, GNorm = 0.1595, lr_0 = 1.7733e-04
Loss = 1.5702e-03, PNorm = 153.7914, GNorm = 0.1391, lr_0 = 1.7721e-04
Loss = 1.5066e-03, PNorm = 153.7942, GNorm = 0.0560, lr_0 = 1.7709e-04
Loss = 1.2463e-03, PNorm = 153.7977, GNorm = 0.0852, lr_0 = 1.7696e-04
Loss = 1.3298e-03, PNorm = 153.8010, GNorm = 0.0531, lr_0 = 1.7684e-04
Loss = 1.0061e-03, PNorm = 153.8035, GNorm = 0.0499, lr_0 = 1.7672e-04
Loss = 1.7903e-03, PNorm = 153.8075, GNorm = 0.0807, lr_0 = 1.7660e-04
Loss = 2.2940e-03, PNorm = 153.8089, GNorm = 0.1899, lr_0 = 1.7648e-04
Loss = 1.1702e-03, PNorm = 153.8111, GNorm = 0.1371, lr_0 = 1.7636e-04
Loss = 1.2919e-03, PNorm = 153.8132, GNorm = 0.0803, lr_0 = 1.7624e-04
Loss = 1.9128e-03, PNorm = 153.8164, GNorm = 0.1293, lr_0 = 1.7612e-04
Loss = 1.5289e-03, PNorm = 153.8214, GNorm = 0.2583, lr_0 = 1.7600e-04
Loss = 1.3412e-03, PNorm = 153.8245, GNorm = 0.1990, lr_0 = 1.7588e-04
Loss = 2.2239e-03, PNorm = 153.8272, GNorm = 0.0643, lr_0 = 1.7576e-04
Loss = 2.9265e-03, PNorm = 153.8295, GNorm = 0.1060, lr_0 = 1.7564e-04
Loss = 1.1931e-03, PNorm = 153.8317, GNorm = 0.1008, lr_0 = 1.7552e-04
Loss = 1.8538e-03, PNorm = 153.8334, GNorm = 0.1890, lr_0 = 1.7540e-04
Loss = 1.9374e-03, PNorm = 153.8358, GNorm = 0.1288, lr_0 = 1.7528e-04
Loss = 1.1807e-03, PNorm = 153.8365, GNorm = 0.1347, lr_0 = 1.7516e-04
Loss = 1.4480e-03, PNorm = 153.8388, GNorm = 0.0606, lr_0 = 1.7504e-04
Loss = 1.2135e-03, PNorm = 153.8422, GNorm = 0.2801, lr_0 = 1.7492e-04
Loss = 1.8612e-03, PNorm = 153.8434, GNorm = 0.1512, lr_0 = 1.7480e-04
Loss = 3.6446e-03, PNorm = 153.8471, GNorm = 0.0922, lr_0 = 1.7468e-04
Loss = 1.7030e-03, PNorm = 153.8495, GNorm = 0.1452, lr_0 = 1.7456e-04
Loss = 1.0596e-03, PNorm = 153.8541, GNorm = 0.0548, lr_0 = 1.7444e-04
Loss = 1.2489e-03, PNorm = 153.8564, GNorm = 0.0965, lr_0 = 1.7432e-04
Loss = 2.2684e-03, PNorm = 153.8577, GNorm = 0.1856, lr_0 = 1.7420e-04
Loss = 1.8146e-03, PNorm = 153.8601, GNorm = 0.0931, lr_0 = 1.7408e-04
Loss = 1.1951e-03, PNorm = 153.8620, GNorm = 0.2028, lr_0 = 1.7396e-04
Loss = 1.0009e-03, PNorm = 153.8649, GNorm = 0.0518, lr_0 = 1.7384e-04
Loss = 1.6226e-03, PNorm = 153.8665, GNorm = 0.2160, lr_0 = 1.7372e-04
Loss = 1.7066e-03, PNorm = 153.8690, GNorm = 0.1625, lr_0 = 1.7360e-04
Loss = 2.4433e-03, PNorm = 153.8705, GNorm = 0.1083, lr_0 = 1.7348e-04
Loss = 1.3386e-03, PNorm = 153.8732, GNorm = 0.0702, lr_0 = 1.7336e-04
Loss = 2.7737e-03, PNorm = 153.8741, GNorm = 0.1052, lr_0 = 1.7325e-04
Loss = 1.6656e-03, PNorm = 153.8769, GNorm = 0.0884, lr_0 = 1.7313e-04
Loss = 2.1603e-03, PNorm = 153.8805, GNorm = 0.2185, lr_0 = 1.7301e-04
Loss = 1.9501e-03, PNorm = 153.8828, GNorm = 0.3227, lr_0 = 1.7289e-04
Loss = 1.9917e-03, PNorm = 153.8847, GNorm = 0.2090, lr_0 = 1.7277e-04
Loss = 1.5913e-03, PNorm = 153.8879, GNorm = 0.0381, lr_0 = 1.7265e-04
Loss = 1.0525e-03, PNorm = 153.8911, GNorm = 0.0372, lr_0 = 1.7253e-04
Loss = 2.5745e-03, PNorm = 153.8940, GNorm = 0.1261, lr_0 = 1.7242e-04
Loss = 1.1037e-03, PNorm = 153.8973, GNorm = 0.1567, lr_0 = 1.7230e-04
Loss = 1.0976e-03, PNorm = 153.8992, GNorm = 0.0345, lr_0 = 1.7218e-04
Loss = 1.7508e-03, PNorm = 153.9026, GNorm = 0.0606, lr_0 = 1.7206e-04
Loss = 1.9688e-03, PNorm = 153.9079, GNorm = 0.2211, lr_0 = 1.7194e-04
Loss = 1.3887e-03, PNorm = 153.9115, GNorm = 0.0479, lr_0 = 1.7183e-04
Loss = 1.5490e-03, PNorm = 153.9136, GNorm = 0.1361, lr_0 = 1.7171e-04
Loss = 1.1789e-03, PNorm = 153.9159, GNorm = 0.0728, lr_0 = 1.7159e-04
Loss = 2.7546e-03, PNorm = 153.9194, GNorm = 0.1229, lr_0 = 1.7147e-04
Loss = 1.5801e-03, PNorm = 153.9197, GNorm = 0.1493, lr_0 = 1.7136e-04
Loss = 1.7441e-03, PNorm = 153.9231, GNorm = 0.0469, lr_0 = 1.7124e-04
Loss = 1.2999e-03, PNorm = 153.9266, GNorm = 0.0653, lr_0 = 1.7112e-04
Loss = 2.9678e-03, PNorm = 153.9318, GNorm = 0.2248, lr_0 = 1.7100e-04
Loss = 1.0933e-03, PNorm = 153.9352, GNorm = 0.1013, lr_0 = 1.7089e-04
Loss = 1.3710e-03, PNorm = 153.9371, GNorm = 0.0976, lr_0 = 1.7077e-04
Loss = 1.5896e-03, PNorm = 153.9394, GNorm = 0.0909, lr_0 = 1.7065e-04
Loss = 1.1936e-03, PNorm = 153.9419, GNorm = 0.0349, lr_0 = 1.7054e-04
Loss = 1.1765e-03, PNorm = 153.9465, GNorm = 0.1802, lr_0 = 1.7042e-04
Loss = 1.5206e-03, PNorm = 153.9495, GNorm = 0.0993, lr_0 = 1.7030e-04
Loss = 1.5733e-03, PNorm = 153.9524, GNorm = 0.0628, lr_0 = 1.7019e-04
Loss = 1.1840e-03, PNorm = 153.9547, GNorm = 0.0713, lr_0 = 1.7007e-04
Loss = 1.2297e-03, PNorm = 153.9573, GNorm = 0.1290, lr_0 = 1.6995e-04
Loss = 1.6155e-03, PNorm = 153.9593, GNorm = 0.0451, lr_0 = 1.6984e-04
Loss = 1.0737e-03, PNorm = 153.9628, GNorm = 0.1375, lr_0 = 1.6972e-04
Loss = 1.1420e-03, PNorm = 153.9658, GNorm = 0.0812, lr_0 = 1.6960e-04
Loss = 2.5034e-03, PNorm = 153.9685, GNorm = 0.2774, lr_0 = 1.6949e-04
Loss = 1.4941e-03, PNorm = 153.9722, GNorm = 0.0962, lr_0 = 1.6937e-04
Loss = 1.5679e-03, PNorm = 153.9740, GNorm = 0.0340, lr_0 = 1.6926e-04
Loss = 1.2976e-03, PNorm = 153.9778, GNorm = 0.2009, lr_0 = 1.6914e-04
Loss = 1.7599e-03, PNorm = 153.9804, GNorm = 0.1627, lr_0 = 1.6902e-04
Loss = 1.4204e-03, PNorm = 153.9840, GNorm = 0.0855, lr_0 = 1.6891e-04
Loss = 2.0302e-03, PNorm = 153.9868, GNorm = 0.0953, lr_0 = 1.6879e-04
Loss = 2.2239e-03, PNorm = 153.9908, GNorm = 0.2446, lr_0 = 1.6868e-04
Loss = 2.7457e-03, PNorm = 153.9949, GNorm = 0.1139, lr_0 = 1.6856e-04
Loss = 1.2713e-03, PNorm = 153.9988, GNorm = 0.1681, lr_0 = 1.6845e-04
Loss = 1.6991e-03, PNorm = 154.0016, GNorm = 0.0971, lr_0 = 1.6833e-04
Loss = 1.5783e-03, PNorm = 154.0040, GNorm = 0.0780, lr_0 = 1.6821e-04
Loss = 1.3219e-03, PNorm = 154.0075, GNorm = 0.1713, lr_0 = 1.6810e-04
Loss = 2.3067e-03, PNorm = 154.0130, GNorm = 0.2898, lr_0 = 1.6798e-04
Loss = 2.4793e-03, PNorm = 154.0166, GNorm = 0.0530, lr_0 = 1.6787e-04
Loss = 1.3812e-03, PNorm = 154.0206, GNorm = 0.0442, lr_0 = 1.6775e-04
Loss = 2.8119e-03, PNorm = 154.0217, GNorm = 0.1226, lr_0 = 1.6764e-04
Loss = 1.9715e-03, PNorm = 154.0225, GNorm = 0.1656, lr_0 = 1.6752e-04
Loss = 1.7529e-03, PNorm = 154.0239, GNorm = 0.1216, lr_0 = 1.6741e-04
Loss = 3.0951e-03, PNorm = 154.0278, GNorm = 0.1138, lr_0 = 1.6729e-04
Loss = 2.4768e-03, PNorm = 154.0310, GNorm = 0.2777, lr_0 = 1.6718e-04
Loss = 1.7697e-03, PNorm = 154.0349, GNorm = 0.1889, lr_0 = 1.6707e-04
Loss = 3.1985e-03, PNorm = 154.0372, GNorm = 0.0518, lr_0 = 1.6695e-04
Loss = 1.3323e-03, PNorm = 154.0390, GNorm = 0.2874, lr_0 = 1.6684e-04
Loss = 2.8718e-03, PNorm = 154.0441, GNorm = 0.1027, lr_0 = 1.6672e-04
Loss = 2.1275e-03, PNorm = 154.0480, GNorm = 0.0559, lr_0 = 1.6661e-04
Loss = 3.6333e-03, PNorm = 154.0515, GNorm = 0.0622, lr_0 = 1.6649e-04
Loss = 1.1217e-03, PNorm = 154.0562, GNorm = 0.1191, lr_0 = 1.6638e-04
Loss = 1.5378e-03, PNorm = 154.0594, GNorm = 0.0474, lr_0 = 1.6627e-04
Loss = 4.9786e-03, PNorm = 154.0620, GNorm = 0.0886, lr_0 = 1.6615e-04
Loss = 1.4088e-03, PNorm = 154.0662, GNorm = 0.2092, lr_0 = 1.6604e-04
Loss = 1.0414e-03, PNorm = 154.0684, GNorm = 0.0602, lr_0 = 1.6592e-04
Loss = 1.1743e-03, PNorm = 154.0719, GNorm = 0.1302, lr_0 = 1.6581e-04
Loss = 1.8905e-03, PNorm = 154.0744, GNorm = 0.2214, lr_0 = 1.6570e-04
Loss = 1.5889e-03, PNorm = 154.0779, GNorm = 0.1310, lr_0 = 1.6558e-04
Loss = 2.1427e-03, PNorm = 154.0797, GNorm = 0.1193, lr_0 = 1.6547e-04
Loss = 2.5415e-03, PNorm = 154.0803, GNorm = 0.0839, lr_0 = 1.6536e-04
Loss = 1.7443e-03, PNorm = 154.0834, GNorm = 0.0696, lr_0 = 1.6524e-04
Loss = 2.7934e-03, PNorm = 154.0870, GNorm = 0.1944, lr_0 = 1.6513e-04
Loss = 1.7273e-03, PNorm = 154.0874, GNorm = 0.1959, lr_0 = 1.6502e-04
Loss = 1.3320e-03, PNorm = 154.0882, GNorm = 0.1039, lr_0 = 1.6490e-04
Loss = 2.9330e-03, PNorm = 154.0910, GNorm = 0.1206, lr_0 = 1.6479e-04
Loss = 1.2453e-03, PNorm = 154.0947, GNorm = 0.1279, lr_0 = 1.6468e-04
Loss = 2.7384e-03, PNorm = 154.0974, GNorm = 0.1037, lr_0 = 1.6457e-04
Loss = 2.9190e-03, PNorm = 154.0988, GNorm = 0.1730, lr_0 = 1.6445e-04
Loss = 3.9326e-03, PNorm = 154.1022, GNorm = 0.3668, lr_0 = 1.6434e-04
Loss = 1.3884e-03, PNorm = 154.1072, GNorm = 0.1221, lr_0 = 1.6423e-04
Loss = 3.2760e-03, PNorm = 154.1117, GNorm = 0.1132, lr_0 = 1.6412e-04
Loss = 1.1761e-03, PNorm = 154.1160, GNorm = 0.1258, lr_0 = 1.6400e-04
Loss = 1.1739e-03, PNorm = 154.1200, GNorm = 0.1030, lr_0 = 1.6389e-04
Loss = 1.3456e-03, PNorm = 154.1225, GNorm = 0.0714, lr_0 = 1.6378e-04
Validation mae = 0.474940
Epoch 24
Loss = 1.4886e-03, PNorm = 154.1237, GNorm = 0.2002, lr_0 = 1.6367e-04
Loss = 1.5152e-03, PNorm = 154.1270, GNorm = 0.1124, lr_0 = 1.6355e-04
Loss = 9.7490e-04, PNorm = 154.1294, GNorm = 0.0592, lr_0 = 1.6344e-04
Loss = 1.6739e-03, PNorm = 154.1322, GNorm = 0.0329, lr_0 = 1.6333e-04
Loss = 2.0099e-03, PNorm = 154.1351, GNorm = 0.1705, lr_0 = 1.6322e-04
Loss = 9.5391e-04, PNorm = 154.1365, GNorm = 0.0580, lr_0 = 1.6311e-04
Loss = 1.1029e-03, PNorm = 154.1381, GNorm = 0.0784, lr_0 = 1.6299e-04
Loss = 1.0015e-03, PNorm = 154.1398, GNorm = 0.1872, lr_0 = 1.6288e-04
Loss = 1.8055e-03, PNorm = 154.1404, GNorm = 0.3567, lr_0 = 1.6277e-04
Loss = 3.6533e-03, PNorm = 154.1434, GNorm = 0.0395, lr_0 = 1.6266e-04
Loss = 9.7622e-04, PNorm = 154.1461, GNorm = 0.2231, lr_0 = 1.6255e-04
Loss = 1.7828e-03, PNorm = 154.1495, GNorm = 0.0673, lr_0 = 1.6244e-04
Loss = 1.0538e-03, PNorm = 154.1526, GNorm = 0.2473, lr_0 = 1.6233e-04
Loss = 2.1140e-03, PNorm = 154.1547, GNorm = 0.0570, lr_0 = 1.6221e-04
Loss = 1.7980e-03, PNorm = 154.1556, GNorm = 0.1377, lr_0 = 1.6210e-04
Loss = 9.6739e-04, PNorm = 154.1557, GNorm = 0.0863, lr_0 = 1.6199e-04
Loss = 1.4914e-03, PNorm = 154.1569, GNorm = 0.1432, lr_0 = 1.6188e-04
Loss = 9.6763e-04, PNorm = 154.1572, GNorm = 0.1020, lr_0 = 1.6177e-04
Loss = 1.6289e-03, PNorm = 154.1597, GNorm = 0.0429, lr_0 = 1.6166e-04
Loss = 1.1560e-03, PNorm = 154.1619, GNorm = 0.0681, lr_0 = 1.6155e-04
Loss = 1.8179e-03, PNorm = 154.1644, GNorm = 0.1514, lr_0 = 1.6144e-04
Loss = 1.9298e-03, PNorm = 154.1670, GNorm = 0.0825, lr_0 = 1.6133e-04
Loss = 1.6331e-03, PNorm = 154.1707, GNorm = 0.1281, lr_0 = 1.6122e-04
Loss = 1.8522e-03, PNorm = 154.1747, GNorm = 0.0534, lr_0 = 1.6111e-04
Loss = 1.2500e-03, PNorm = 154.1771, GNorm = 0.0986, lr_0 = 1.6100e-04
Loss = 1.5806e-03, PNorm = 154.1805, GNorm = 0.0825, lr_0 = 1.6089e-04
Loss = 9.7830e-04, PNorm = 154.1825, GNorm = 0.0971, lr_0 = 1.6078e-04
Loss = 1.5371e-03, PNorm = 154.1836, GNorm = 0.0714, lr_0 = 1.6067e-04
Loss = 2.5023e-03, PNorm = 154.1846, GNorm = 0.1289, lr_0 = 1.6056e-04
Loss = 1.2509e-03, PNorm = 154.1874, GNorm = 0.1040, lr_0 = 1.6045e-04
Loss = 1.1817e-03, PNorm = 154.1884, GNorm = 0.0924, lr_0 = 1.6034e-04
Loss = 1.3103e-03, PNorm = 154.1918, GNorm = 0.2487, lr_0 = 1.6023e-04
Loss = 1.1488e-03, PNorm = 154.1943, GNorm = 0.1958, lr_0 = 1.6012e-04
Loss = 1.1699e-03, PNorm = 154.1977, GNorm = 0.0448, lr_0 = 1.6001e-04
Loss = 1.7906e-03, PNorm = 154.2015, GNorm = 0.0533, lr_0 = 1.5990e-04
Loss = 1.3256e-03, PNorm = 154.2028, GNorm = 0.0592, lr_0 = 1.5979e-04
Loss = 1.2077e-03, PNorm = 154.2053, GNorm = 0.0500, lr_0 = 1.5968e-04
Loss = 1.0785e-03, PNorm = 154.2078, GNorm = 0.0858, lr_0 = 1.5957e-04
Loss = 1.1354e-03, PNorm = 154.2095, GNorm = 0.1159, lr_0 = 1.5946e-04
Loss = 1.2380e-03, PNorm = 154.2129, GNorm = 0.0490, lr_0 = 1.5935e-04
Loss = 9.1196e-04, PNorm = 154.2144, GNorm = 0.1188, lr_0 = 1.5924e-04
Loss = 1.0539e-03, PNorm = 154.2153, GNorm = 0.0810, lr_0 = 1.5913e-04
Loss = 1.2641e-03, PNorm = 154.2181, GNorm = 0.0685, lr_0 = 1.5902e-04
Loss = 1.6311e-03, PNorm = 154.2211, GNorm = 0.0880, lr_0 = 1.5891e-04
Loss = 1.7570e-03, PNorm = 154.2234, GNorm = 0.1018, lr_0 = 1.5880e-04
Loss = 3.3353e-03, PNorm = 154.2244, GNorm = 0.0461, lr_0 = 1.5870e-04
Loss = 7.8314e-04, PNorm = 154.2271, GNorm = 0.1089, lr_0 = 1.5859e-04
Loss = 1.9989e-03, PNorm = 154.2309, GNorm = 0.2715, lr_0 = 1.5848e-04
Loss = 1.9035e-03, PNorm = 154.2335, GNorm = 0.1268, lr_0 = 1.5837e-04
Loss = 1.1826e-03, PNorm = 154.2359, GNorm = 0.0484, lr_0 = 1.5826e-04
Loss = 1.2641e-03, PNorm = 154.2377, GNorm = 0.0862, lr_0 = 1.5815e-04
Loss = 1.3051e-03, PNorm = 154.2430, GNorm = 0.1250, lr_0 = 1.5804e-04
Loss = 1.4106e-03, PNorm = 154.2452, GNorm = 0.0582, lr_0 = 1.5794e-04
Loss = 1.3527e-03, PNorm = 154.2470, GNorm = 0.1430, lr_0 = 1.5783e-04
Loss = 2.6167e-03, PNorm = 154.2477, GNorm = 0.1088, lr_0 = 1.5772e-04
Loss = 3.0788e-03, PNorm = 154.2474, GNorm = 0.1019, lr_0 = 1.5761e-04
Loss = 2.2456e-03, PNorm = 154.2500, GNorm = 0.1467, lr_0 = 1.5750e-04
Loss = 9.8302e-04, PNorm = 154.2517, GNorm = 0.1654, lr_0 = 1.5740e-04
Loss = 1.2466e-03, PNorm = 154.2534, GNorm = 0.0895, lr_0 = 1.5729e-04
Loss = 1.1859e-03, PNorm = 154.2553, GNorm = 0.1007, lr_0 = 1.5718e-04
Loss = 1.4086e-03, PNorm = 154.2587, GNorm = 0.0524, lr_0 = 1.5707e-04
Loss = 1.1027e-03, PNorm = 154.2607, GNorm = 0.0510, lr_0 = 1.5697e-04
Loss = 9.6909e-04, PNorm = 154.2620, GNorm = 0.0697, lr_0 = 1.5686e-04
Loss = 1.1228e-03, PNorm = 154.2647, GNorm = 0.1377, lr_0 = 1.5675e-04
Loss = 9.6045e-04, PNorm = 154.2680, GNorm = 0.1157, lr_0 = 1.5664e-04
Loss = 1.7196e-03, PNorm = 154.2711, GNorm = 0.4209, lr_0 = 1.5654e-04
Loss = 4.1465e-03, PNorm = 154.2735, GNorm = 0.1293, lr_0 = 1.5643e-04
Loss = 2.9115e-03, PNorm = 154.2768, GNorm = 0.3199, lr_0 = 1.5632e-04
Loss = 1.1788e-03, PNorm = 154.2769, GNorm = 0.0640, lr_0 = 1.5621e-04
Loss = 1.8543e-03, PNorm = 154.2792, GNorm = 0.0676, lr_0 = 1.5611e-04
Loss = 1.8331e-03, PNorm = 154.2819, GNorm = 0.0928, lr_0 = 1.5600e-04
Loss = 1.8182e-03, PNorm = 154.2857, GNorm = 0.3771, lr_0 = 1.5589e-04
Loss = 9.8510e-04, PNorm = 154.2881, GNorm = 0.0502, lr_0 = 1.5579e-04
Loss = 2.5823e-03, PNorm = 154.2919, GNorm = 0.0722, lr_0 = 1.5568e-04
Loss = 1.7712e-03, PNorm = 154.2937, GNorm = 0.0935, lr_0 = 1.5557e-04
Loss = 1.2760e-03, PNorm = 154.2974, GNorm = 0.0690, lr_0 = 1.5547e-04
Loss = 1.0505e-03, PNorm = 154.3015, GNorm = 0.0653, lr_0 = 1.5536e-04
Loss = 3.0279e-03, PNorm = 154.3049, GNorm = 0.1444, lr_0 = 1.5525e-04
Loss = 3.6902e-03, PNorm = 154.3092, GNorm = 0.1903, lr_0 = 1.5515e-04
Loss = 2.2886e-03, PNorm = 154.3118, GNorm = 0.2510, lr_0 = 1.5504e-04
Loss = 2.0512e-03, PNorm = 154.3129, GNorm = 0.1047, lr_0 = 1.5493e-04
Loss = 1.4185e-03, PNorm = 154.3137, GNorm = 0.1196, lr_0 = 1.5483e-04
Loss = 1.1057e-03, PNorm = 154.3179, GNorm = 0.1643, lr_0 = 1.5472e-04
Loss = 2.5149e-03, PNorm = 154.3211, GNorm = 0.1949, lr_0 = 1.5462e-04
Loss = 1.3694e-03, PNorm = 154.3239, GNorm = 0.1821, lr_0 = 1.5451e-04
Loss = 1.4674e-03, PNorm = 154.3266, GNorm = 0.0596, lr_0 = 1.5440e-04
Loss = 3.2666e-03, PNorm = 154.3301, GNorm = 0.0514, lr_0 = 1.5430e-04
Loss = 2.0140e-03, PNorm = 154.3327, GNorm = 0.1506, lr_0 = 1.5419e-04
Loss = 8.9775e-04, PNorm = 154.3342, GNorm = 0.0617, lr_0 = 1.5409e-04
Loss = 2.0593e-03, PNorm = 154.3379, GNorm = 0.0894, lr_0 = 1.5398e-04
Loss = 1.5921e-03, PNorm = 154.3412, GNorm = 0.1095, lr_0 = 1.5388e-04
Loss = 3.3215e-03, PNorm = 154.3441, GNorm = 0.0830, lr_0 = 1.5377e-04
Loss = 2.3878e-03, PNorm = 154.3468, GNorm = 0.0906, lr_0 = 1.5367e-04
Loss = 1.3009e-03, PNorm = 154.3515, GNorm = 0.0789, lr_0 = 1.5356e-04
Loss = 2.1328e-03, PNorm = 154.3546, GNorm = 0.0909, lr_0 = 1.5346e-04
Loss = 7.6115e-04, PNorm = 154.3567, GNorm = 0.1498, lr_0 = 1.5335e-04
Loss = 1.1183e-03, PNorm = 154.3585, GNorm = 0.0635, lr_0 = 1.5325e-04
Loss = 1.6798e-03, PNorm = 154.3570, GNorm = 0.0856, lr_0 = 1.5314e-04
Loss = 1.6812e-03, PNorm = 154.3579, GNorm = 0.0533, lr_0 = 1.5304e-04
Loss = 1.1178e-03, PNorm = 154.3590, GNorm = 0.0427, lr_0 = 1.5293e-04
Loss = 8.0396e-04, PNorm = 154.3584, GNorm = 0.0360, lr_0 = 1.5283e-04
Loss = 1.2192e-03, PNorm = 154.3595, GNorm = 0.1854, lr_0 = 1.5272e-04
Loss = 1.0077e-03, PNorm = 154.3632, GNorm = 0.0475, lr_0 = 1.5262e-04
Loss = 2.0961e-03, PNorm = 154.3666, GNorm = 0.0427, lr_0 = 1.5251e-04
Loss = 1.1749e-03, PNorm = 154.3712, GNorm = 0.1766, lr_0 = 1.5241e-04
Loss = 1.9882e-03, PNorm = 154.3739, GNorm = 0.1144, lr_0 = 1.5230e-04
Loss = 3.4279e-03, PNorm = 154.3761, GNorm = 0.1376, lr_0 = 1.5220e-04
Loss = 1.7747e-03, PNorm = 154.3785, GNorm = 0.0853, lr_0 = 1.5209e-04
Loss = 9.6849e-04, PNorm = 154.3800, GNorm = 0.0643, lr_0 = 1.5199e-04
Loss = 1.3902e-03, PNorm = 154.3830, GNorm = 0.1236, lr_0 = 1.5189e-04
Loss = 9.2832e-04, PNorm = 154.3845, GNorm = 0.0448, lr_0 = 1.5178e-04
Loss = 1.8704e-03, PNorm = 154.3870, GNorm = 0.0795, lr_0 = 1.5168e-04
Loss = 1.3343e-03, PNorm = 154.3899, GNorm = 0.3854, lr_0 = 1.5157e-04
Loss = 3.0090e-03, PNorm = 154.3918, GNorm = 0.0873, lr_0 = 1.5147e-04
Loss = 2.3659e-03, PNorm = 154.3956, GNorm = 0.0591, lr_0 = 1.5137e-04
Loss = 1.8139e-03, PNorm = 154.3983, GNorm = 0.0450, lr_0 = 1.5126e-04
Loss = 2.4719e-03, PNorm = 154.3999, GNorm = 0.1896, lr_0 = 1.5116e-04
Loss = 1.8854e-03, PNorm = 154.4022, GNorm = 0.0360, lr_0 = 1.5106e-04
Loss = 2.1107e-03, PNorm = 154.4054, GNorm = 0.1930, lr_0 = 1.5095e-04
Loss = 3.8485e-03, PNorm = 154.4093, GNorm = 0.1160, lr_0 = 1.5085e-04
Validation mae = 0.476104
Epoch 25
Loss = 9.9442e-04, PNorm = 154.4103, GNorm = 0.0546, lr_0 = 1.5075e-04
Loss = 8.8750e-04, PNorm = 154.4121, GNorm = 0.0543, lr_0 = 1.5064e-04
Loss = 8.6976e-04, PNorm = 154.4137, GNorm = 0.1081, lr_0 = 1.5054e-04
Loss = 1.4509e-03, PNorm = 154.4167, GNorm = 0.0444, lr_0 = 1.5044e-04
Loss = 9.2031e-04, PNorm = 154.4195, GNorm = 0.0823, lr_0 = 1.5033e-04
Loss = 1.7836e-03, PNorm = 154.4218, GNorm = 0.0718, lr_0 = 1.5023e-04
Loss = 2.3066e-03, PNorm = 154.4239, GNorm = 0.1395, lr_0 = 1.5013e-04
Loss = 1.2833e-03, PNorm = 154.4244, GNorm = 0.0667, lr_0 = 1.5002e-04
Loss = 1.5379e-03, PNorm = 154.4261, GNorm = 0.2352, lr_0 = 1.4992e-04
Loss = 1.2114e-03, PNorm = 154.4255, GNorm = 0.0719, lr_0 = 1.4982e-04
Loss = 1.0192e-03, PNorm = 154.4263, GNorm = 0.0937, lr_0 = 1.4972e-04
Loss = 1.0284e-03, PNorm = 154.4272, GNorm = 0.0580, lr_0 = 1.4961e-04
Loss = 1.2489e-03, PNorm = 154.4307, GNorm = 0.0799, lr_0 = 1.4951e-04
Loss = 7.9754e-04, PNorm = 154.4323, GNorm = 0.1057, lr_0 = 1.4941e-04
Loss = 3.2337e-03, PNorm = 154.4347, GNorm = 0.0794, lr_0 = 1.4931e-04
Loss = 1.3197e-03, PNorm = 154.4374, GNorm = 0.0450, lr_0 = 1.4920e-04
Loss = 3.4232e-03, PNorm = 154.4392, GNorm = 0.1130, lr_0 = 1.4910e-04
Loss = 1.6836e-03, PNorm = 154.4399, GNorm = 0.0801, lr_0 = 1.4900e-04
Loss = 1.0727e-03, PNorm = 154.4419, GNorm = 0.0318, lr_0 = 1.4890e-04
Loss = 2.3100e-03, PNorm = 154.4447, GNorm = 0.2646, lr_0 = 1.4880e-04
Loss = 2.0120e-03, PNorm = 154.4484, GNorm = 0.1780, lr_0 = 1.4869e-04
Loss = 7.8261e-04, PNorm = 154.4503, GNorm = 0.0628, lr_0 = 1.4859e-04
Loss = 2.3341e-03, PNorm = 154.4521, GNorm = 0.1541, lr_0 = 1.4849e-04
Loss = 1.0673e-03, PNorm = 154.4533, GNorm = 0.0611, lr_0 = 1.4839e-04
Loss = 1.6305e-03, PNorm = 154.4531, GNorm = 0.1566, lr_0 = 1.4829e-04
Loss = 1.0741e-03, PNorm = 154.4554, GNorm = 0.2422, lr_0 = 1.4818e-04
Loss = 1.9543e-03, PNorm = 154.4560, GNorm = 0.1842, lr_0 = 1.4808e-04
Loss = 7.7607e-04, PNorm = 154.4564, GNorm = 0.0509, lr_0 = 1.4798e-04
Loss = 1.7383e-03, PNorm = 154.4572, GNorm = 0.0613, lr_0 = 1.4788e-04
Loss = 8.9336e-04, PNorm = 154.4599, GNorm = 0.1357, lr_0 = 1.4778e-04
Loss = 2.1175e-03, PNorm = 154.4625, GNorm = 0.0466, lr_0 = 1.4768e-04
Loss = 1.0393e-03, PNorm = 154.4649, GNorm = 0.0918, lr_0 = 1.4758e-04
Loss = 1.9313e-03, PNorm = 154.4678, GNorm = 0.1862, lr_0 = 1.4748e-04
Loss = 1.2983e-03, PNorm = 154.4679, GNorm = 0.0690, lr_0 = 1.4737e-04
Loss = 9.4918e-04, PNorm = 154.4687, GNorm = 0.1242, lr_0 = 1.4727e-04
Loss = 1.1930e-03, PNorm = 154.4706, GNorm = 0.1748, lr_0 = 1.4717e-04
Loss = 1.2225e-03, PNorm = 154.4741, GNorm = 0.0493, lr_0 = 1.4707e-04
Loss = 9.6305e-04, PNorm = 154.4777, GNorm = 0.1095, lr_0 = 1.4697e-04
Loss = 1.4938e-03, PNorm = 154.4786, GNorm = 0.1448, lr_0 = 1.4687e-04
Loss = 1.3577e-03, PNorm = 154.4790, GNorm = 0.0726, lr_0 = 1.4677e-04
Loss = 2.7071e-03, PNorm = 154.4790, GNorm = 0.3087, lr_0 = 1.4667e-04
Loss = 8.9876e-04, PNorm = 154.4810, GNorm = 0.0936, lr_0 = 1.4657e-04
Loss = 1.0257e-03, PNorm = 154.4830, GNorm = 0.1483, lr_0 = 1.4647e-04
Loss = 9.5571e-04, PNorm = 154.4864, GNorm = 0.0946, lr_0 = 1.4637e-04
Loss = 1.4087e-03, PNorm = 154.4904, GNorm = 0.2071, lr_0 = 1.4627e-04
Loss = 1.1085e-03, PNorm = 154.4932, GNorm = 0.1438, lr_0 = 1.4617e-04
Loss = 1.0825e-03, PNorm = 154.4966, GNorm = 0.1851, lr_0 = 1.4607e-04
Loss = 1.0427e-03, PNorm = 154.4966, GNorm = 0.1470, lr_0 = 1.4597e-04
Loss = 1.9531e-03, PNorm = 154.4986, GNorm = 0.0800, lr_0 = 1.4587e-04
Loss = 7.9917e-04, PNorm = 154.5001, GNorm = 0.0326, lr_0 = 1.4577e-04
Loss = 1.8732e-03, PNorm = 154.5010, GNorm = 0.1147, lr_0 = 1.4567e-04
Loss = 1.0281e-03, PNorm = 154.5024, GNorm = 0.1213, lr_0 = 1.4557e-04
Loss = 2.9618e-03, PNorm = 154.5042, GNorm = 0.0637, lr_0 = 1.4547e-04
Loss = 9.2005e-04, PNorm = 154.5065, GNorm = 0.1153, lr_0 = 1.4537e-04
Loss = 2.7464e-03, PNorm = 154.5091, GNorm = 0.1313, lr_0 = 1.4527e-04
Loss = 1.6520e-03, PNorm = 154.5110, GNorm = 0.2713, lr_0 = 1.4517e-04
Loss = 2.3165e-03, PNorm = 154.5149, GNorm = 0.1184, lr_0 = 1.4507e-04
Loss = 1.7645e-03, PNorm = 154.5180, GNorm = 0.1208, lr_0 = 1.4497e-04
Loss = 2.6087e-03, PNorm = 154.5199, GNorm = 0.0365, lr_0 = 1.4487e-04
Loss = 1.3929e-03, PNorm = 154.5230, GNorm = 0.0384, lr_0 = 1.4477e-04
Loss = 1.9581e-03, PNorm = 154.5241, GNorm = 0.4891, lr_0 = 1.4467e-04
Loss = 1.1459e-03, PNorm = 154.5269, GNorm = 0.0255, lr_0 = 1.4457e-04
Loss = 1.2404e-03, PNorm = 154.5307, GNorm = 0.0247, lr_0 = 1.4447e-04
Loss = 1.0167e-03, PNorm = 154.5328, GNorm = 0.0348, lr_0 = 1.4438e-04
Loss = 2.1525e-03, PNorm = 154.5351, GNorm = 0.1789, lr_0 = 1.4428e-04
Loss = 1.6109e-03, PNorm = 154.5373, GNorm = 0.0312, lr_0 = 1.4418e-04
Loss = 1.7045e-03, PNorm = 154.5393, GNorm = 0.1230, lr_0 = 1.4408e-04
Loss = 2.9007e-03, PNorm = 154.5407, GNorm = 0.0734, lr_0 = 1.4398e-04
Loss = 2.2240e-03, PNorm = 154.5415, GNorm = 0.1331, lr_0 = 1.4388e-04
Loss = 1.2065e-03, PNorm = 154.5432, GNorm = 0.1159, lr_0 = 1.4378e-04
Loss = 1.3540e-03, PNorm = 154.5444, GNorm = 0.0661, lr_0 = 1.4368e-04
Loss = 2.4564e-03, PNorm = 154.5469, GNorm = 0.1398, lr_0 = 1.4359e-04
Loss = 1.3455e-03, PNorm = 154.5500, GNorm = 0.0720, lr_0 = 1.4349e-04
Loss = 2.4012e-03, PNorm = 154.5525, GNorm = 0.0929, lr_0 = 1.4339e-04
Loss = 2.2825e-03, PNorm = 154.5556, GNorm = 0.0961, lr_0 = 1.4329e-04
Loss = 2.8554e-03, PNorm = 154.5578, GNorm = 0.4115, lr_0 = 1.4319e-04
Loss = 2.1736e-03, PNorm = 154.5577, GNorm = 0.1708, lr_0 = 1.4310e-04
Loss = 4.1044e-03, PNorm = 154.5606, GNorm = 0.0802, lr_0 = 1.4300e-04
Loss = 1.1316e-03, PNorm = 154.5645, GNorm = 0.0747, lr_0 = 1.4290e-04
Loss = 9.6588e-04, PNorm = 154.5684, GNorm = 0.0849, lr_0 = 1.4280e-04
Loss = 9.1883e-04, PNorm = 154.5723, GNorm = 0.0775, lr_0 = 1.4270e-04
Loss = 1.0254e-03, PNorm = 154.5748, GNorm = 0.1245, lr_0 = 1.4261e-04
Loss = 9.0316e-04, PNorm = 154.5776, GNorm = 0.0369, lr_0 = 1.4251e-04
Loss = 1.6544e-03, PNorm = 154.5796, GNorm = 0.1866, lr_0 = 1.4241e-04
Loss = 1.1958e-03, PNorm = 154.5791, GNorm = 0.1464, lr_0 = 1.4231e-04
Loss = 8.0273e-04, PNorm = 154.5803, GNorm = 0.1216, lr_0 = 1.4222e-04
Loss = 1.2610e-03, PNorm = 154.5813, GNorm = 0.1634, lr_0 = 1.4212e-04
Loss = 8.7505e-04, PNorm = 154.5829, GNorm = 0.0796, lr_0 = 1.4202e-04
Loss = 2.1259e-03, PNorm = 154.5864, GNorm = 0.1300, lr_0 = 1.4192e-04
Loss = 1.0455e-03, PNorm = 154.5874, GNorm = 0.1411, lr_0 = 1.4183e-04
Loss = 1.6349e-03, PNorm = 154.5884, GNorm = 0.2296, lr_0 = 1.4173e-04
Loss = 1.4016e-03, PNorm = 154.5898, GNorm = 0.0941, lr_0 = 1.4163e-04
Loss = 1.7846e-03, PNorm = 154.5908, GNorm = 0.0296, lr_0 = 1.4153e-04
Loss = 1.0699e-03, PNorm = 154.5932, GNorm = 0.0720, lr_0 = 1.4144e-04
Loss = 2.0185e-03, PNorm = 154.5958, GNorm = 0.2555, lr_0 = 1.4134e-04
Loss = 1.1164e-03, PNorm = 154.5995, GNorm = 0.0487, lr_0 = 1.4124e-04
Loss = 1.6681e-03, PNorm = 154.6030, GNorm = 0.1589, lr_0 = 1.4115e-04
Loss = 1.0901e-03, PNorm = 154.6053, GNorm = 0.0691, lr_0 = 1.4105e-04
Loss = 1.0803e-03, PNorm = 154.6072, GNorm = 0.1219, lr_0 = 1.4095e-04
Loss = 4.7645e-03, PNorm = 154.6096, GNorm = 0.5539, lr_0 = 1.4086e-04
Loss = 9.3975e-04, PNorm = 154.6093, GNorm = 0.1088, lr_0 = 1.4076e-04
Loss = 1.3037e-03, PNorm = 154.6127, GNorm = 0.1669, lr_0 = 1.4066e-04
Loss = 9.4019e-04, PNorm = 154.6152, GNorm = 0.0997, lr_0 = 1.4057e-04
Loss = 8.5784e-04, PNorm = 154.6187, GNorm = 0.0385, lr_0 = 1.4047e-04
Loss = 1.0662e-03, PNorm = 154.6213, GNorm = 0.0498, lr_0 = 1.4038e-04
Loss = 1.2044e-03, PNorm = 154.6245, GNorm = 0.1973, lr_0 = 1.4028e-04
Loss = 1.5753e-03, PNorm = 154.6283, GNorm = 0.0991, lr_0 = 1.4018e-04
Loss = 1.5460e-03, PNorm = 154.6298, GNorm = 0.0958, lr_0 = 1.4009e-04
Loss = 2.3591e-03, PNorm = 154.6322, GNorm = 0.1421, lr_0 = 1.3999e-04
Loss = 1.7407e-03, PNorm = 154.6334, GNorm = 0.1168, lr_0 = 1.3990e-04
Loss = 8.6837e-04, PNorm = 154.6355, GNorm = 0.0992, lr_0 = 1.3980e-04
Loss = 9.2094e-04, PNorm = 154.6387, GNorm = 0.0860, lr_0 = 1.3970e-04
Loss = 3.9943e-03, PNorm = 154.6405, GNorm = 0.3422, lr_0 = 1.3961e-04
Loss = 1.7818e-03, PNorm = 154.6437, GNorm = 0.0570, lr_0 = 1.3951e-04
Loss = 7.4117e-04, PNorm = 154.6470, GNorm = 0.0682, lr_0 = 1.3942e-04
Loss = 1.7316e-03, PNorm = 154.6506, GNorm = 0.1428, lr_0 = 1.3932e-04
Loss = 9.5004e-04, PNorm = 154.6513, GNorm = 0.0892, lr_0 = 1.3923e-04
Loss = 1.7430e-03, PNorm = 154.6528, GNorm = 0.0521, lr_0 = 1.3913e-04
Loss = 1.8216e-03, PNorm = 154.6544, GNorm = 0.0706, lr_0 = 1.3904e-04
Loss = 7.1542e-04, PNorm = 154.6563, GNorm = 0.1225, lr_0 = 1.3894e-04
Validation mae = 0.474584
Epoch 26
Loss = 2.6046e-03, PNorm = 154.6591, GNorm = 0.0928, lr_0 = 1.3884e-04
Loss = 7.9482e-04, PNorm = 154.6607, GNorm = 0.0391, lr_0 = 1.3875e-04
Loss = 7.3554e-04, PNorm = 154.6610, GNorm = 0.0977, lr_0 = 1.3865e-04
Loss = 7.2431e-04, PNorm = 154.6605, GNorm = 0.0635, lr_0 = 1.3856e-04
Loss = 1.6903e-03, PNorm = 154.6604, GNorm = 0.0491, lr_0 = 1.3846e-04
Loss = 7.5681e-04, PNorm = 154.6615, GNorm = 0.0743, lr_0 = 1.3837e-04
Loss = 9.0669e-04, PNorm = 154.6638, GNorm = 0.0649, lr_0 = 1.3828e-04
Loss = 8.1724e-04, PNorm = 154.6663, GNorm = 0.0456, lr_0 = 1.3818e-04
Loss = 1.4280e-03, PNorm = 154.6686, GNorm = 0.0798, lr_0 = 1.3809e-04
Loss = 7.2896e-04, PNorm = 154.6696, GNorm = 0.0953, lr_0 = 1.3799e-04
Loss = 1.2256e-03, PNorm = 154.6696, GNorm = 0.0944, lr_0 = 1.3790e-04
Loss = 6.9493e-04, PNorm = 154.6694, GNorm = 0.0346, lr_0 = 1.3780e-04
Loss = 2.1026e-03, PNorm = 154.6695, GNorm = 0.0672, lr_0 = 1.3771e-04
Loss = 1.4472e-03, PNorm = 154.6716, GNorm = 0.0835, lr_0 = 1.3761e-04
Loss = 1.2239e-03, PNorm = 154.6738, GNorm = 0.0926, lr_0 = 1.3752e-04
Loss = 2.0585e-03, PNorm = 154.6770, GNorm = 0.0594, lr_0 = 1.3742e-04
Loss = 6.9793e-04, PNorm = 154.6784, GNorm = 0.0575, lr_0 = 1.3733e-04
Loss = 9.6177e-04, PNorm = 154.6818, GNorm = 0.0935, lr_0 = 1.3724e-04
Loss = 1.1043e-03, PNorm = 154.6821, GNorm = 0.2973, lr_0 = 1.3714e-04
Loss = 1.0085e-03, PNorm = 154.6834, GNorm = 0.1514, lr_0 = 1.3705e-04
Loss = 7.8512e-04, PNorm = 154.6856, GNorm = 0.1700, lr_0 = 1.3695e-04
Loss = 1.4384e-03, PNorm = 154.6876, GNorm = 0.2273, lr_0 = 1.3686e-04
Loss = 7.9999e-04, PNorm = 154.6892, GNorm = 0.1036, lr_0 = 1.3677e-04
Loss = 1.6192e-03, PNorm = 154.6924, GNorm = 0.1584, lr_0 = 1.3667e-04
Loss = 8.7251e-04, PNorm = 154.6941, GNorm = 0.1050, lr_0 = 1.3658e-04
Loss = 3.4298e-03, PNorm = 154.6979, GNorm = 0.1354, lr_0 = 1.3649e-04
Loss = 8.0351e-04, PNorm = 154.7000, GNorm = 0.0986, lr_0 = 1.3639e-04
Loss = 9.8937e-04, PNorm = 154.7021, GNorm = 0.0459, lr_0 = 1.3630e-04
Loss = 8.4277e-04, PNorm = 154.7028, GNorm = 0.0732, lr_0 = 1.3621e-04
Loss = 1.7153e-03, PNorm = 154.7036, GNorm = 0.1529, lr_0 = 1.3611e-04
Loss = 8.8605e-04, PNorm = 154.7049, GNorm = 0.1672, lr_0 = 1.3602e-04
Loss = 1.5633e-03, PNorm = 154.7076, GNorm = 0.0915, lr_0 = 1.3593e-04
Loss = 7.6820e-04, PNorm = 154.7095, GNorm = 0.1298, lr_0 = 1.3583e-04
Loss = 9.8843e-04, PNorm = 154.7108, GNorm = 0.0752, lr_0 = 1.3574e-04
Loss = 2.0929e-03, PNorm = 154.7123, GNorm = 0.0778, lr_0 = 1.3565e-04
Loss = 7.7417e-04, PNorm = 154.7145, GNorm = 0.0986, lr_0 = 1.3555e-04
Loss = 1.2154e-03, PNorm = 154.7148, GNorm = 0.0891, lr_0 = 1.3546e-04
Loss = 1.8867e-03, PNorm = 154.7158, GNorm = 0.0976, lr_0 = 1.3537e-04
Loss = 1.0872e-03, PNorm = 154.7163, GNorm = 0.0793, lr_0 = 1.3528e-04
Loss = 1.6144e-03, PNorm = 154.7177, GNorm = 0.1255, lr_0 = 1.3518e-04
Loss = 9.6453e-04, PNorm = 154.7193, GNorm = 0.0348, lr_0 = 1.3509e-04
Loss = 1.2089e-03, PNorm = 154.7215, GNorm = 0.1410, lr_0 = 1.3500e-04
Loss = 3.0888e-03, PNorm = 154.7231, GNorm = 0.1960, lr_0 = 1.3491e-04
Loss = 2.6322e-03, PNorm = 154.7236, GNorm = 0.0612, lr_0 = 1.3481e-04
Loss = 1.2580e-03, PNorm = 154.7254, GNorm = 0.0682, lr_0 = 1.3472e-04
Loss = 1.4672e-03, PNorm = 154.7277, GNorm = 0.0655, lr_0 = 1.3463e-04
Loss = 9.3428e-04, PNorm = 154.7302, GNorm = 0.0483, lr_0 = 1.3454e-04
Loss = 8.5996e-04, PNorm = 154.7332, GNorm = 0.0702, lr_0 = 1.3444e-04
Loss = 1.8899e-03, PNorm = 154.7331, GNorm = 0.0740, lr_0 = 1.3435e-04
Loss = 7.0602e-04, PNorm = 154.7345, GNorm = 0.0755, lr_0 = 1.3426e-04
Loss = 1.5292e-03, PNorm = 154.7350, GNorm = 0.0277, lr_0 = 1.3417e-04
Loss = 1.0210e-03, PNorm = 154.7372, GNorm = 0.1112, lr_0 = 1.3408e-04
Loss = 6.8603e-04, PNorm = 154.7407, GNorm = 0.1374, lr_0 = 1.3398e-04
Loss = 6.8195e-04, PNorm = 154.7425, GNorm = 0.0948, lr_0 = 1.3389e-04
Loss = 1.5927e-03, PNorm = 154.7426, GNorm = 0.3219, lr_0 = 1.3380e-04
Loss = 1.2934e-03, PNorm = 154.7445, GNorm = 0.0813, lr_0 = 1.3371e-04
Loss = 9.5085e-04, PNorm = 154.7466, GNorm = 0.0659, lr_0 = 1.3362e-04
Loss = 9.1841e-04, PNorm = 154.7493, GNorm = 0.0664, lr_0 = 1.3353e-04
Loss = 4.4737e-03, PNorm = 154.7501, GNorm = 0.3012, lr_0 = 1.3343e-04
Loss = 1.5415e-03, PNorm = 154.7520, GNorm = 0.2030, lr_0 = 1.3334e-04
Loss = 1.1478e-03, PNorm = 154.7541, GNorm = 0.0276, lr_0 = 1.3325e-04
Loss = 1.6362e-03, PNorm = 154.7564, GNorm = 0.2915, lr_0 = 1.3316e-04
Loss = 1.8136e-03, PNorm = 154.7588, GNorm = 0.2820, lr_0 = 1.3307e-04
Loss = 2.0850e-03, PNorm = 154.7600, GNorm = 0.0995, lr_0 = 1.3298e-04
Loss = 9.5209e-04, PNorm = 154.7623, GNorm = 0.1149, lr_0 = 1.3289e-04
Loss = 8.5826e-04, PNorm = 154.7650, GNorm = 0.0269, lr_0 = 1.3280e-04
Loss = 7.4016e-04, PNorm = 154.7668, GNorm = 0.0745, lr_0 = 1.3270e-04
Loss = 1.7109e-03, PNorm = 154.7670, GNorm = 0.0640, lr_0 = 1.3261e-04
Loss = 3.8545e-03, PNorm = 154.7685, GNorm = 0.0890, lr_0 = 1.3252e-04
Loss = 2.8709e-03, PNorm = 154.7692, GNorm = 0.0332, lr_0 = 1.3243e-04
Loss = 8.7066e-04, PNorm = 154.7739, GNorm = 0.0950, lr_0 = 1.3234e-04
Loss = 2.6103e-03, PNorm = 154.7743, GNorm = 0.1144, lr_0 = 1.3225e-04
Loss = 2.8817e-03, PNorm = 154.7741, GNorm = 0.0717, lr_0 = 1.3216e-04
Loss = 9.8456e-04, PNorm = 154.7753, GNorm = 0.0713, lr_0 = 1.3207e-04
Loss = 1.9526e-03, PNorm = 154.7774, GNorm = 0.0332, lr_0 = 1.3198e-04
Loss = 6.7061e-04, PNorm = 154.7791, GNorm = 0.1591, lr_0 = 1.3189e-04
Loss = 5.8353e-04, PNorm = 154.7824, GNorm = 0.0468, lr_0 = 1.3180e-04
Loss = 1.5480e-03, PNorm = 154.7841, GNorm = 0.3281, lr_0 = 1.3171e-04
Loss = 9.0647e-04, PNorm = 154.7868, GNorm = 0.0605, lr_0 = 1.3162e-04
Loss = 2.7513e-03, PNorm = 154.7865, GNorm = 0.0691, lr_0 = 1.3153e-04
Loss = 8.0108e-04, PNorm = 154.7872, GNorm = 0.0902, lr_0 = 1.3144e-04
Loss = 1.7000e-03, PNorm = 154.7884, GNorm = 0.1613, lr_0 = 1.3135e-04
Loss = 9.1715e-04, PNorm = 154.7912, GNorm = 0.0355, lr_0 = 1.3126e-04
Loss = 7.8347e-04, PNorm = 154.7918, GNorm = 0.1172, lr_0 = 1.3117e-04
Loss = 9.4086e-04, PNorm = 154.7919, GNorm = 0.1119, lr_0 = 1.3108e-04
Loss = 7.9505e-04, PNorm = 154.7936, GNorm = 0.1097, lr_0 = 1.3099e-04
Loss = 1.2093e-03, PNorm = 154.7950, GNorm = 0.0396, lr_0 = 1.3090e-04
Loss = 1.6319e-03, PNorm = 154.7961, GNorm = 0.1594, lr_0 = 1.3081e-04
Loss = 1.5616e-03, PNorm = 154.7982, GNorm = 0.0687, lr_0 = 1.3072e-04
Loss = 7.2244e-04, PNorm = 154.7996, GNorm = 0.1475, lr_0 = 1.3063e-04
Loss = 1.7614e-03, PNorm = 154.8011, GNorm = 0.0809, lr_0 = 1.3054e-04
Loss = 2.5219e-03, PNorm = 154.8029, GNorm = 0.1026, lr_0 = 1.3045e-04
Loss = 7.7548e-04, PNorm = 154.8039, GNorm = 0.0488, lr_0 = 1.3036e-04
Loss = 1.9824e-03, PNorm = 154.8052, GNorm = 0.1442, lr_0 = 1.3027e-04
Loss = 8.0125e-04, PNorm = 154.8074, GNorm = 0.1334, lr_0 = 1.3018e-04
Loss = 1.8310e-03, PNorm = 154.8086, GNorm = 0.0634, lr_0 = 1.3009e-04
Loss = 1.1240e-03, PNorm = 154.8103, GNorm = 0.0663, lr_0 = 1.3000e-04
Loss = 6.8660e-04, PNorm = 154.8127, GNorm = 0.0546, lr_0 = 1.2992e-04
Loss = 1.2877e-03, PNorm = 154.8150, GNorm = 0.0699, lr_0 = 1.2983e-04
Loss = 1.1088e-03, PNorm = 154.8173, GNorm = 0.0560, lr_0 = 1.2974e-04
Loss = 1.4854e-03, PNorm = 154.8205, GNorm = 0.1371, lr_0 = 1.2965e-04
Loss = 1.3944e-03, PNorm = 154.8232, GNorm = 0.0945, lr_0 = 1.2956e-04
Loss = 7.5608e-04, PNorm = 154.8245, GNorm = 0.0522, lr_0 = 1.2947e-04
Loss = 1.2693e-03, PNorm = 154.8251, GNorm = 0.0518, lr_0 = 1.2938e-04
Loss = 1.2993e-03, PNorm = 154.8251, GNorm = 0.0294, lr_0 = 1.2929e-04
Loss = 1.8551e-03, PNorm = 154.8294, GNorm = 0.0805, lr_0 = 1.2921e-04
Loss = 1.7404e-03, PNorm = 154.8329, GNorm = 0.0609, lr_0 = 1.2912e-04
Loss = 8.9267e-04, PNorm = 154.8362, GNorm = 0.0662, lr_0 = 1.2903e-04
Loss = 1.8519e-03, PNorm = 154.8362, GNorm = 0.2936, lr_0 = 1.2894e-04
Loss = 1.6261e-03, PNorm = 154.8371, GNorm = 0.0356, lr_0 = 1.2885e-04
Loss = 7.7460e-04, PNorm = 154.8388, GNorm = 0.0650, lr_0 = 1.2876e-04
Loss = 7.3008e-04, PNorm = 154.8396, GNorm = 0.1130, lr_0 = 1.2867e-04
Loss = 8.3532e-04, PNorm = 154.8410, GNorm = 0.0584, lr_0 = 1.2859e-04
Loss = 1.7709e-03, PNorm = 154.8423, GNorm = 0.0450, lr_0 = 1.2850e-04
Loss = 1.0608e-03, PNorm = 154.8436, GNorm = 0.0582, lr_0 = 1.2841e-04
Loss = 2.0537e-03, PNorm = 154.8456, GNorm = 0.1865, lr_0 = 1.2832e-04
Loss = 8.7177e-04, PNorm = 154.8456, GNorm = 0.1121, lr_0 = 1.2823e-04
Loss = 1.7199e-03, PNorm = 154.8471, GNorm = 0.0736, lr_0 = 1.2815e-04
Loss = 1.3167e-03, PNorm = 154.8486, GNorm = 0.0405, lr_0 = 1.2806e-04
Loss = 4.0402e-03, PNorm = 154.8504, GNorm = 0.0271, lr_0 = 1.2797e-04
Validation mae = 0.474975
Epoch 27
Loss = 8.8878e-04, PNorm = 154.8520, GNorm = 0.0809, lr_0 = 1.2788e-04
Loss = 7.0950e-04, PNorm = 154.8540, GNorm = 0.0713, lr_0 = 1.2780e-04
Loss = 6.7771e-04, PNorm = 154.8540, GNorm = 0.1187, lr_0 = 1.2771e-04
Loss = 1.1316e-03, PNorm = 154.8553, GNorm = 0.0615, lr_0 = 1.2762e-04
Loss = 1.3291e-03, PNorm = 154.8561, GNorm = 0.1597, lr_0 = 1.2753e-04
Loss = 1.1302e-03, PNorm = 154.8561, GNorm = 0.2126, lr_0 = 1.2745e-04
Loss = 1.2469e-03, PNorm = 154.8565, GNorm = 0.1142, lr_0 = 1.2736e-04
Loss = 8.4730e-04, PNorm = 154.8591, GNorm = 0.1060, lr_0 = 1.2727e-04
Loss = 6.0393e-04, PNorm = 154.8613, GNorm = 0.0303, lr_0 = 1.2718e-04
Loss = 6.9255e-04, PNorm = 154.8632, GNorm = 0.0861, lr_0 = 1.2710e-04
Loss = 8.3571e-04, PNorm = 154.8647, GNorm = 0.0362, lr_0 = 1.2701e-04
Loss = 5.9802e-04, PNorm = 154.8651, GNorm = 0.0293, lr_0 = 1.2692e-04
Loss = 1.0006e-03, PNorm = 154.8666, GNorm = 0.1071, lr_0 = 1.2684e-04
Loss = 1.0244e-03, PNorm = 154.8705, GNorm = 0.0695, lr_0 = 1.2675e-04
Loss = 1.1617e-03, PNorm = 154.8738, GNorm = 0.1189, lr_0 = 1.2666e-04
Loss = 1.1328e-03, PNorm = 154.8738, GNorm = 0.0435, lr_0 = 1.2658e-04
Loss = 1.2060e-03, PNorm = 154.8742, GNorm = 0.0978, lr_0 = 1.2649e-04
Loss = 9.9850e-04, PNorm = 154.8759, GNorm = 0.0733, lr_0 = 1.2640e-04
Loss = 3.8114e-03, PNorm = 154.8760, GNorm = 0.0380, lr_0 = 1.2632e-04
Loss = 2.5954e-03, PNorm = 154.8758, GNorm = 0.2121, lr_0 = 1.2623e-04
Loss = 1.3638e-03, PNorm = 154.8755, GNorm = 0.0949, lr_0 = 1.2614e-04
Loss = 1.4507e-03, PNorm = 154.8775, GNorm = 0.0729, lr_0 = 1.2606e-04
Loss = 1.1850e-03, PNorm = 154.8787, GNorm = 0.1159, lr_0 = 1.2597e-04
Loss = 7.6806e-04, PNorm = 154.8792, GNorm = 0.1280, lr_0 = 1.2588e-04
Loss = 2.5383e-03, PNorm = 154.8814, GNorm = 0.0428, lr_0 = 1.2580e-04
Loss = 9.3650e-04, PNorm = 154.8840, GNorm = 0.0465, lr_0 = 1.2571e-04
Loss = 7.3557e-04, PNorm = 154.8870, GNorm = 0.1350, lr_0 = 1.2563e-04
Loss = 2.4086e-03, PNorm = 154.8879, GNorm = 0.2523, lr_0 = 1.2554e-04
Loss = 1.0143e-03, PNorm = 154.8891, GNorm = 0.0926, lr_0 = 1.2545e-04
Loss = 7.2690e-04, PNorm = 154.8891, GNorm = 0.1112, lr_0 = 1.2537e-04
Loss = 6.8605e-04, PNorm = 154.8901, GNorm = 0.1225, lr_0 = 1.2528e-04
Loss = 1.2795e-03, PNorm = 154.8894, GNorm = 0.1524, lr_0 = 1.2520e-04
Loss = 6.2952e-04, PNorm = 154.8932, GNorm = 0.0927, lr_0 = 1.2511e-04
Loss = 6.9621e-04, PNorm = 154.8960, GNorm = 0.0519, lr_0 = 1.2502e-04
Loss = 6.4188e-04, PNorm = 154.8986, GNorm = 0.0675, lr_0 = 1.2494e-04
Loss = 1.2489e-03, PNorm = 154.8996, GNorm = 0.0252, lr_0 = 1.2485e-04
Loss = 1.5015e-03, PNorm = 154.9012, GNorm = 0.1977, lr_0 = 1.2477e-04
Loss = 6.5283e-04, PNorm = 154.9015, GNorm = 0.0733, lr_0 = 1.2468e-04
Loss = 1.0948e-03, PNorm = 154.9031, GNorm = 0.1274, lr_0 = 1.2460e-04
Loss = 7.5230e-04, PNorm = 154.9044, GNorm = 0.0904, lr_0 = 1.2451e-04
Loss = 8.9459e-04, PNorm = 154.9056, GNorm = 0.1170, lr_0 = 1.2443e-04
Loss = 1.5087e-03, PNorm = 154.9071, GNorm = 0.0729, lr_0 = 1.2434e-04
Loss = 5.4420e-04, PNorm = 154.9085, GNorm = 0.0718, lr_0 = 1.2426e-04
Loss = 6.2484e-04, PNorm = 154.9110, GNorm = 0.1013, lr_0 = 1.2417e-04
Loss = 6.2109e-04, PNorm = 154.9121, GNorm = 0.0452, lr_0 = 1.2409e-04
Loss = 6.0834e-04, PNorm = 154.9130, GNorm = 0.0833, lr_0 = 1.2400e-04
Loss = 6.0322e-04, PNorm = 154.9134, GNorm = 0.1140, lr_0 = 1.2392e-04
Loss = 9.8537e-04, PNorm = 154.9158, GNorm = 0.0673, lr_0 = 1.2383e-04
Loss = 8.5482e-04, PNorm = 154.9177, GNorm = 0.0518, lr_0 = 1.2375e-04
Loss = 3.5983e-03, PNorm = 154.9171, GNorm = 0.1657, lr_0 = 1.2366e-04
Loss = 2.8736e-03, PNorm = 154.9207, GNorm = 0.1143, lr_0 = 1.2358e-04
Loss = 1.5709e-03, PNorm = 154.9222, GNorm = 0.1624, lr_0 = 1.2349e-04
Loss = 1.7519e-03, PNorm = 154.9258, GNorm = 0.0866, lr_0 = 1.2341e-04
Loss = 1.3697e-03, PNorm = 154.9274, GNorm = 0.0966, lr_0 = 1.2332e-04
Loss = 6.0852e-04, PNorm = 154.9293, GNorm = 0.0402, lr_0 = 1.2324e-04
Loss = 9.4018e-04, PNorm = 154.9316, GNorm = 0.1530, lr_0 = 1.2315e-04
Loss = 9.9824e-04, PNorm = 154.9336, GNorm = 0.1218, lr_0 = 1.2307e-04
Loss = 1.8919e-03, PNorm = 154.9349, GNorm = 0.0862, lr_0 = 1.2298e-04
Loss = 1.0092e-03, PNorm = 154.9354, GNorm = 0.0728, lr_0 = 1.2290e-04
Loss = 1.5947e-03, PNorm = 154.9354, GNorm = 0.1329, lr_0 = 1.2282e-04
Loss = 6.2046e-04, PNorm = 154.9360, GNorm = 0.1408, lr_0 = 1.2273e-04
Loss = 1.2535e-03, PNorm = 154.9397, GNorm = 0.0615, lr_0 = 1.2265e-04
Loss = 1.7059e-03, PNorm = 154.9430, GNorm = 0.1749, lr_0 = 1.2256e-04
Loss = 6.1459e-04, PNorm = 154.9443, GNorm = 0.0637, lr_0 = 1.2248e-04
Loss = 9.6318e-04, PNorm = 154.9454, GNorm = 0.0267, lr_0 = 1.2240e-04
Loss = 1.1994e-03, PNorm = 154.9472, GNorm = 0.2097, lr_0 = 1.2231e-04
Loss = 6.8133e-04, PNorm = 154.9486, GNorm = 0.0526, lr_0 = 1.2223e-04
Loss = 2.4907e-03, PNorm = 154.9489, GNorm = 0.0835, lr_0 = 1.2214e-04
Loss = 6.3739e-04, PNorm = 154.9499, GNorm = 0.2006, lr_0 = 1.2206e-04
Loss = 6.1771e-04, PNorm = 154.9523, GNorm = 0.0384, lr_0 = 1.2198e-04
Loss = 1.7220e-03, PNorm = 154.9539, GNorm = 0.1249, lr_0 = 1.2189e-04
Loss = 2.2186e-03, PNorm = 154.9546, GNorm = 0.1242, lr_0 = 1.2181e-04
Loss = 2.0931e-03, PNorm = 154.9569, GNorm = 0.0205, lr_0 = 1.2173e-04
Loss = 8.0875e-04, PNorm = 154.9606, GNorm = 0.0442, lr_0 = 1.2164e-04
Loss = 1.2492e-03, PNorm = 154.9634, GNorm = 0.0472, lr_0 = 1.2156e-04
Loss = 1.1046e-03, PNorm = 154.9670, GNorm = 0.1239, lr_0 = 1.2148e-04
Loss = 8.0855e-04, PNorm = 154.9676, GNorm = 0.0841, lr_0 = 1.2139e-04
Loss = 2.8060e-03, PNorm = 154.9690, GNorm = 0.0793, lr_0 = 1.2131e-04
Loss = 1.4291e-03, PNorm = 154.9688, GNorm = 0.1136, lr_0 = 1.2123e-04
Loss = 5.8939e-04, PNorm = 154.9707, GNorm = 0.0765, lr_0 = 1.2114e-04
Loss = 3.2506e-03, PNorm = 154.9733, GNorm = 0.0894, lr_0 = 1.2106e-04
Loss = 6.3676e-04, PNorm = 154.9754, GNorm = 0.1498, lr_0 = 1.2098e-04
Loss = 2.2910e-03, PNorm = 154.9764, GNorm = 0.1226, lr_0 = 1.2090e-04
Loss = 1.1298e-03, PNorm = 154.9794, GNorm = 0.0888, lr_0 = 1.2081e-04
Loss = 9.1699e-04, PNorm = 154.9807, GNorm = 0.0336, lr_0 = 1.2073e-04
Loss = 2.5792e-03, PNorm = 154.9826, GNorm = 0.1896, lr_0 = 1.2065e-04
Loss = 7.8868e-04, PNorm = 154.9845, GNorm = 0.0892, lr_0 = 1.2056e-04
Loss = 2.7701e-03, PNorm = 154.9855, GNorm = 0.0887, lr_0 = 1.2048e-04
Loss = 1.6568e-03, PNorm = 154.9858, GNorm = 0.1630, lr_0 = 1.2040e-04
Loss = 8.7655e-04, PNorm = 154.9859, GNorm = 0.0703, lr_0 = 1.2032e-04
Loss = 9.1559e-04, PNorm = 154.9873, GNorm = 0.0295, lr_0 = 1.2023e-04
Loss = 2.9156e-03, PNorm = 154.9882, GNorm = 0.0298, lr_0 = 1.2015e-04
Loss = 1.2932e-03, PNorm = 154.9886, GNorm = 0.0536, lr_0 = 1.2007e-04
Loss = 7.4442e-04, PNorm = 154.9891, GNorm = 0.1732, lr_0 = 1.1999e-04
Loss = 1.6027e-03, PNorm = 154.9901, GNorm = 0.2695, lr_0 = 1.1991e-04
Loss = 1.0788e-03, PNorm = 154.9903, GNorm = 0.0630, lr_0 = 1.1982e-04
Loss = 1.3543e-03, PNorm = 154.9919, GNorm = 0.0759, lr_0 = 1.1974e-04
Loss = 1.1541e-03, PNorm = 154.9942, GNorm = 0.0236, lr_0 = 1.1966e-04
Loss = 9.5122e-04, PNorm = 154.9975, GNorm = 0.1086, lr_0 = 1.1958e-04
Loss = 1.9867e-03, PNorm = 154.9986, GNorm = 0.1220, lr_0 = 1.1950e-04
Loss = 7.6941e-04, PNorm = 155.0013, GNorm = 0.1990, lr_0 = 1.1941e-04
Loss = 1.6692e-03, PNorm = 155.0016, GNorm = 0.0618, lr_0 = 1.1933e-04
Loss = 2.3070e-03, PNorm = 155.0030, GNorm = 0.1965, lr_0 = 1.1925e-04
Loss = 8.0298e-04, PNorm = 155.0080, GNorm = 0.0782, lr_0 = 1.1917e-04
Loss = 9.3043e-04, PNorm = 155.0097, GNorm = 0.1022, lr_0 = 1.1909e-04
Loss = 1.3905e-03, PNorm = 155.0115, GNorm = 0.2672, lr_0 = 1.1901e-04
Loss = 1.0391e-03, PNorm = 155.0123, GNorm = 0.1131, lr_0 = 1.1892e-04
Loss = 1.1007e-03, PNorm = 155.0135, GNorm = 0.0698, lr_0 = 1.1884e-04
Loss = 3.3541e-03, PNorm = 155.0168, GNorm = 0.1515, lr_0 = 1.1876e-04
Loss = 6.1189e-04, PNorm = 155.0179, GNorm = 0.0627, lr_0 = 1.1868e-04
Loss = 1.3520e-03, PNorm = 155.0195, GNorm = 0.0428, lr_0 = 1.1860e-04
Loss = 1.1871e-03, PNorm = 155.0210, GNorm = 0.0531, lr_0 = 1.1852e-04
Loss = 1.0592e-03, PNorm = 155.0205, GNorm = 0.1635, lr_0 = 1.1844e-04
Loss = 8.8657e-04, PNorm = 155.0211, GNorm = 0.2682, lr_0 = 1.1835e-04
Loss = 7.6153e-04, PNorm = 155.0236, GNorm = 0.1403, lr_0 = 1.1827e-04
Loss = 1.4513e-03, PNorm = 155.0252, GNorm = 0.0573, lr_0 = 1.1819e-04
Loss = 1.1732e-03, PNorm = 155.0283, GNorm = 0.1323, lr_0 = 1.1811e-04
Loss = 1.8546e-03, PNorm = 155.0299, GNorm = 0.0470, lr_0 = 1.1803e-04
Loss = 6.3927e-04, PNorm = 155.0312, GNorm = 0.0348, lr_0 = 1.1795e-04
Loss = 1.1302e-03, PNorm = 155.0346, GNorm = 0.1020, lr_0 = 1.1787e-04
Validation mae = 0.474722
Epoch 28
Loss = 2.4115e-03, PNorm = 155.0358, GNorm = 0.0391, lr_0 = 1.1779e-04
Loss = 8.0273e-04, PNorm = 155.0361, GNorm = 0.0670, lr_0 = 1.1771e-04
Loss = 4.7935e-04, PNorm = 155.0362, GNorm = 0.0543, lr_0 = 1.1763e-04
Loss = 7.6766e-04, PNorm = 155.0377, GNorm = 0.1721, lr_0 = 1.1755e-04
Loss = 7.8684e-04, PNorm = 155.0392, GNorm = 0.0387, lr_0 = 1.1747e-04
Loss = 6.1743e-04, PNorm = 155.0412, GNorm = 0.1681, lr_0 = 1.1739e-04
Loss = 8.2063e-04, PNorm = 155.0421, GNorm = 0.0593, lr_0 = 1.1730e-04
Loss = 1.2241e-03, PNorm = 155.0425, GNorm = 0.0786, lr_0 = 1.1722e-04
Loss = 5.2066e-04, PNorm = 155.0418, GNorm = 0.1156, lr_0 = 1.1714e-04
Loss = 1.8622e-03, PNorm = 155.0446, GNorm = 0.1085, lr_0 = 1.1706e-04
Loss = 9.4750e-04, PNorm = 155.0452, GNorm = 0.1048, lr_0 = 1.1698e-04
Loss = 4.9705e-04, PNorm = 155.0459, GNorm = 0.0812, lr_0 = 1.1690e-04
Loss = 6.5831e-04, PNorm = 155.0463, GNorm = 0.1106, lr_0 = 1.1682e-04
Loss = 6.1071e-04, PNorm = 155.0471, GNorm = 0.1029, lr_0 = 1.1674e-04
Loss = 1.1871e-03, PNorm = 155.0480, GNorm = 0.0604, lr_0 = 1.1666e-04
Loss = 1.1100e-03, PNorm = 155.0496, GNorm = 0.2449, lr_0 = 1.1658e-04
Loss = 7.5277e-04, PNorm = 155.0503, GNorm = 0.0473, lr_0 = 1.1650e-04
Loss = 1.0577e-03, PNorm = 155.0513, GNorm = 0.0804, lr_0 = 1.1642e-04
Loss = 8.7266e-04, PNorm = 155.0529, GNorm = 0.1332, lr_0 = 1.1634e-04
Loss = 5.5296e-04, PNorm = 155.0542, GNorm = 0.1030, lr_0 = 1.1626e-04
Loss = 8.1381e-04, PNorm = 155.0547, GNorm = 0.1642, lr_0 = 1.1618e-04
Loss = 4.9188e-04, PNorm = 155.0556, GNorm = 0.0287, lr_0 = 1.1611e-04
Loss = 1.3767e-03, PNorm = 155.0568, GNorm = 0.1835, lr_0 = 1.1603e-04
Loss = 9.2807e-04, PNorm = 155.0584, GNorm = 0.0536, lr_0 = 1.1595e-04
Loss = 1.3967e-03, PNorm = 155.0587, GNorm = 0.1293, lr_0 = 1.1587e-04
Loss = 1.5166e-03, PNorm = 155.0593, GNorm = 0.0989, lr_0 = 1.1579e-04
Loss = 6.5505e-04, PNorm = 155.0609, GNorm = 0.1022, lr_0 = 1.1571e-04
Loss = 1.7859e-03, PNorm = 155.0622, GNorm = 0.1370, lr_0 = 1.1563e-04
Loss = 1.5166e-03, PNorm = 155.0623, GNorm = 0.0790, lr_0 = 1.1555e-04
Loss = 7.6907e-04, PNorm = 155.0645, GNorm = 0.0344, lr_0 = 1.1547e-04
Loss = 9.8379e-04, PNorm = 155.0673, GNorm = 0.0677, lr_0 = 1.1539e-04
Loss = 9.6097e-04, PNorm = 155.0677, GNorm = 0.0687, lr_0 = 1.1531e-04
Loss = 5.6516e-04, PNorm = 155.0697, GNorm = 0.0691, lr_0 = 1.1523e-04
Loss = 1.0455e-03, PNorm = 155.0699, GNorm = 0.1234, lr_0 = 1.1515e-04
Loss = 7.9200e-04, PNorm = 155.0693, GNorm = 0.0674, lr_0 = 1.1508e-04
Loss = 1.6068e-03, PNorm = 155.0688, GNorm = 0.0676, lr_0 = 1.1500e-04
Loss = 9.5719e-04, PNorm = 155.0693, GNorm = 0.0357, lr_0 = 1.1492e-04
Loss = 8.6384e-04, PNorm = 155.0696, GNorm = 0.0579, lr_0 = 1.1484e-04
Loss = 8.4920e-04, PNorm = 155.0709, GNorm = 0.0392, lr_0 = 1.1476e-04
Loss = 9.4074e-04, PNorm = 155.0720, GNorm = 0.0497, lr_0 = 1.1468e-04
Loss = 6.4263e-04, PNorm = 155.0744, GNorm = 0.1270, lr_0 = 1.1460e-04
Loss = 4.7466e-04, PNorm = 155.0758, GNorm = 0.0283, lr_0 = 1.1452e-04
Loss = 5.7828e-04, PNorm = 155.0771, GNorm = 0.0326, lr_0 = 1.1445e-04
Loss = 9.6101e-04, PNorm = 155.0789, GNorm = 0.0499, lr_0 = 1.1437e-04
Loss = 2.6517e-03, PNorm = 155.0788, GNorm = 0.0667, lr_0 = 1.1429e-04
Loss = 1.2474e-03, PNorm = 155.0794, GNorm = 0.0943, lr_0 = 1.1421e-04
Loss = 1.2837e-03, PNorm = 155.0801, GNorm = 0.0768, lr_0 = 1.1413e-04
Loss = 6.5789e-04, PNorm = 155.0821, GNorm = 0.0402, lr_0 = 1.1405e-04
Loss = 5.7799e-04, PNorm = 155.0829, GNorm = 0.1480, lr_0 = 1.1398e-04
Loss = 1.7537e-03, PNorm = 155.0852, GNorm = 0.0773, lr_0 = 1.1390e-04
Loss = 8.1173e-04, PNorm = 155.0857, GNorm = 0.0631, lr_0 = 1.1382e-04
Loss = 7.2973e-04, PNorm = 155.0873, GNorm = 0.0973, lr_0 = 1.1374e-04
Loss = 1.1869e-03, PNorm = 155.0890, GNorm = 0.0506, lr_0 = 1.1366e-04
Loss = 4.6021e-04, PNorm = 155.0892, GNorm = 0.0344, lr_0 = 1.1359e-04
Loss = 5.7643e-04, PNorm = 155.0913, GNorm = 0.0924, lr_0 = 1.1351e-04
Loss = 6.1669e-04, PNorm = 155.0920, GNorm = 0.0927, lr_0 = 1.1343e-04
Loss = 9.9307e-04, PNorm = 155.0934, GNorm = 0.1408, lr_0 = 1.1335e-04
Loss = 1.0328e-03, PNorm = 155.0949, GNorm = 0.1324, lr_0 = 1.1328e-04
Loss = 6.4506e-04, PNorm = 155.0950, GNorm = 0.0922, lr_0 = 1.1320e-04
Loss = 1.0723e-03, PNorm = 155.0975, GNorm = 0.0676, lr_0 = 1.1312e-04
Loss = 5.7762e-04, PNorm = 155.0982, GNorm = 0.0306, lr_0 = 1.1304e-04
Loss = 7.6817e-04, PNorm = 155.0992, GNorm = 0.1153, lr_0 = 1.1297e-04
Loss = 5.3940e-03, PNorm = 155.1002, GNorm = 0.1277, lr_0 = 1.1289e-04
Loss = 9.2962e-04, PNorm = 155.1026, GNorm = 0.0343, lr_0 = 1.1281e-04
Loss = 6.5794e-04, PNorm = 155.1032, GNorm = 0.0859, lr_0 = 1.1273e-04
Loss = 7.7998e-04, PNorm = 155.1049, GNorm = 0.0405, lr_0 = 1.1266e-04
Loss = 1.0078e-03, PNorm = 155.1057, GNorm = 0.0588, lr_0 = 1.1258e-04
Loss = 6.4667e-04, PNorm = 155.1052, GNorm = 0.1063, lr_0 = 1.1250e-04
Loss = 4.6101e-04, PNorm = 155.1064, GNorm = 0.0511, lr_0 = 1.1243e-04
Loss = 5.9758e-04, PNorm = 155.1081, GNorm = 0.0356, lr_0 = 1.1235e-04
Loss = 1.5706e-03, PNorm = 155.1097, GNorm = 0.1410, lr_0 = 1.1227e-04
Loss = 8.6235e-04, PNorm = 155.1111, GNorm = 0.0407, lr_0 = 1.1219e-04
Loss = 2.6803e-03, PNorm = 155.1123, GNorm = 0.4254, lr_0 = 1.1212e-04
Loss = 3.1624e-03, PNorm = 155.1122, GNorm = 0.0383, lr_0 = 1.1204e-04
Loss = 9.0132e-04, PNorm = 155.1122, GNorm = 0.1502, lr_0 = 1.1196e-04
Loss = 7.9307e-04, PNorm = 155.1130, GNorm = 0.0824, lr_0 = 1.1189e-04
Loss = 4.3329e-04, PNorm = 155.1153, GNorm = 0.0236, lr_0 = 1.1181e-04
Loss = 1.0462e-03, PNorm = 155.1168, GNorm = 0.0514, lr_0 = 1.1173e-04
Loss = 9.8372e-04, PNorm = 155.1185, GNorm = 0.1882, lr_0 = 1.1166e-04
Loss = 7.3580e-04, PNorm = 155.1207, GNorm = 0.0608, lr_0 = 1.1158e-04
Loss = 8.1055e-04, PNorm = 155.1204, GNorm = 0.1310, lr_0 = 1.1150e-04
Loss = 8.8918e-04, PNorm = 155.1208, GNorm = 0.0896, lr_0 = 1.1143e-04
Loss = 5.4151e-04, PNorm = 155.1222, GNorm = 0.0287, lr_0 = 1.1135e-04
Loss = 1.4001e-03, PNorm = 155.1226, GNorm = 0.0947, lr_0 = 1.1128e-04
Loss = 2.5595e-03, PNorm = 155.1234, GNorm = 0.1037, lr_0 = 1.1120e-04
Loss = 2.8139e-03, PNorm = 155.1256, GNorm = 0.0613, lr_0 = 1.1112e-04
Loss = 3.8675e-03, PNorm = 155.1275, GNorm = 0.2871, lr_0 = 1.1105e-04
Loss = 6.8841e-04, PNorm = 155.1298, GNorm = 0.0415, lr_0 = 1.1097e-04
Loss = 7.5664e-04, PNorm = 155.1315, GNorm = 0.1113, lr_0 = 1.1089e-04
Loss = 7.1558e-04, PNorm = 155.1320, GNorm = 0.0250, lr_0 = 1.1082e-04
Loss = 6.0201e-04, PNorm = 155.1329, GNorm = 0.0613, lr_0 = 1.1074e-04
Loss = 1.0078e-03, PNorm = 155.1329, GNorm = 0.0656, lr_0 = 1.1067e-04
Loss = 6.7863e-04, PNorm = 155.1346, GNorm = 0.0647, lr_0 = 1.1059e-04
Loss = 4.9190e-04, PNorm = 155.1364, GNorm = 0.0291, lr_0 = 1.1052e-04
Loss = 1.9278e-03, PNorm = 155.1368, GNorm = 0.0453, lr_0 = 1.1044e-04
Loss = 1.3001e-03, PNorm = 155.1378, GNorm = 0.0734, lr_0 = 1.1036e-04
Loss = 2.4071e-03, PNorm = 155.1409, GNorm = 0.1151, lr_0 = 1.1029e-04
Loss = 8.6840e-04, PNorm = 155.1416, GNorm = 0.0529, lr_0 = 1.1021e-04
Loss = 5.9256e-04, PNorm = 155.1440, GNorm = 0.1158, lr_0 = 1.1014e-04
Loss = 6.1970e-04, PNorm = 155.1444, GNorm = 0.0538, lr_0 = 1.1006e-04
Loss = 4.6013e-03, PNorm = 155.1468, GNorm = 0.1139, lr_0 = 1.0999e-04
Loss = 9.1658e-04, PNorm = 155.1480, GNorm = 0.2628, lr_0 = 1.0991e-04
Loss = 7.6847e-04, PNorm = 155.1506, GNorm = 0.0357, lr_0 = 1.0984e-04
Loss = 7.5722e-04, PNorm = 155.1527, GNorm = 0.0273, lr_0 = 1.0976e-04
Loss = 1.2075e-03, PNorm = 155.1541, GNorm = 0.1676, lr_0 = 1.0969e-04
Loss = 1.4973e-03, PNorm = 155.1545, GNorm = 0.1280, lr_0 = 1.0961e-04
Loss = 1.4020e-03, PNorm = 155.1555, GNorm = 0.1226, lr_0 = 1.0954e-04
Loss = 1.1030e-03, PNorm = 155.1565, GNorm = 0.0829, lr_0 = 1.0946e-04
Loss = 9.8545e-04, PNorm = 155.1585, GNorm = 0.0603, lr_0 = 1.0939e-04
Loss = 1.1592e-03, PNorm = 155.1584, GNorm = 0.0196, lr_0 = 1.0931e-04
Loss = 2.1356e-03, PNorm = 155.1593, GNorm = 0.1240, lr_0 = 1.0924e-04
Loss = 1.2739e-03, PNorm = 155.1615, GNorm = 0.0347, lr_0 = 1.0916e-04
Loss = 5.9829e-04, PNorm = 155.1636, GNorm = 0.0987, lr_0 = 1.0909e-04
Loss = 1.6599e-03, PNorm = 155.1664, GNorm = 0.1438, lr_0 = 1.0901e-04
Loss = 7.7605e-04, PNorm = 155.1677, GNorm = 0.1211, lr_0 = 1.0894e-04
Loss = 3.5412e-03, PNorm = 155.1699, GNorm = 0.2143, lr_0 = 1.0886e-04
Loss = 1.9485e-03, PNorm = 155.1716, GNorm = 0.1075, lr_0 = 1.0879e-04
Loss = 1.0537e-03, PNorm = 155.1730, GNorm = 0.0352, lr_0 = 1.0871e-04
Loss = 3.3243e-03, PNorm = 155.1743, GNorm = 0.0649, lr_0 = 1.0864e-04
Loss = 2.3479e-03, PNorm = 155.1747, GNorm = 0.1280, lr_0 = 1.0856e-04
Validation mae = 0.474335
Epoch 29
Loss = 5.0395e-04, PNorm = 155.1771, GNorm = 0.1406, lr_0 = 1.0849e-04
Loss = 2.1895e-03, PNorm = 155.1763, GNorm = 0.1161, lr_0 = 1.0841e-04
Loss = 5.3895e-04, PNorm = 155.1771, GNorm = 0.0996, lr_0 = 1.0834e-04
Loss = 8.7279e-04, PNorm = 155.1782, GNorm = 0.0509, lr_0 = 1.0827e-04
Loss = 5.9790e-04, PNorm = 155.1803, GNorm = 0.0277, lr_0 = 1.0819e-04
Loss = 3.9276e-04, PNorm = 155.1816, GNorm = 0.0906, lr_0 = 1.0812e-04
Loss = 1.3446e-03, PNorm = 155.1841, GNorm = 0.1125, lr_0 = 1.0804e-04
Loss = 2.1768e-03, PNorm = 155.1859, GNorm = 0.3855, lr_0 = 1.0797e-04
Loss = 9.9593e-04, PNorm = 155.1897, GNorm = 0.1233, lr_0 = 1.0790e-04
Loss = 1.7049e-03, PNorm = 155.1902, GNorm = 0.1193, lr_0 = 1.0782e-04
Loss = 5.6919e-04, PNorm = 155.1900, GNorm = 0.1404, lr_0 = 1.0775e-04
Loss = 5.0010e-04, PNorm = 155.1897, GNorm = 0.0377, lr_0 = 1.0767e-04
Loss = 4.5674e-04, PNorm = 155.1902, GNorm = 0.0402, lr_0 = 1.0760e-04
Loss = 4.0540e-04, PNorm = 155.1910, GNorm = 0.0666, lr_0 = 1.0753e-04
Loss = 1.2476e-03, PNorm = 155.1912, GNorm = 0.1062, lr_0 = 1.0745e-04
Loss = 1.0093e-03, PNorm = 155.1928, GNorm = 0.0472, lr_0 = 1.0738e-04
Loss = 4.8472e-04, PNorm = 155.1946, GNorm = 0.0167, lr_0 = 1.0731e-04
Loss = 5.5621e-04, PNorm = 155.1960, GNorm = 0.0945, lr_0 = 1.0723e-04
Loss = 1.7427e-03, PNorm = 155.1958, GNorm = 0.0964, lr_0 = 1.0716e-04
Loss = 5.7968e-04, PNorm = 155.1960, GNorm = 0.0579, lr_0 = 1.0709e-04
Loss = 1.1492e-03, PNorm = 155.1959, GNorm = 0.0455, lr_0 = 1.0701e-04
Loss = 5.4615e-04, PNorm = 155.1956, GNorm = 0.1437, lr_0 = 1.0694e-04
Loss = 1.5678e-03, PNorm = 155.1961, GNorm = 0.1263, lr_0 = 1.0687e-04
Loss = 2.1080e-03, PNorm = 155.1970, GNorm = 0.0798, lr_0 = 1.0679e-04
Loss = 5.4640e-04, PNorm = 155.1987, GNorm = 0.0606, lr_0 = 1.0672e-04
Loss = 9.2443e-04, PNorm = 155.2012, GNorm = 0.1039, lr_0 = 1.0665e-04
Loss = 4.8591e-04, PNorm = 155.2020, GNorm = 0.0776, lr_0 = 1.0657e-04
Loss = 3.7124e-03, PNorm = 155.2034, GNorm = 0.0969, lr_0 = 1.0650e-04
Loss = 8.7058e-04, PNorm = 155.2059, GNorm = 0.0470, lr_0 = 1.0643e-04
Loss = 4.8428e-04, PNorm = 155.2075, GNorm = 0.0771, lr_0 = 1.0635e-04
Loss = 1.5388e-03, PNorm = 155.2086, GNorm = 0.1616, lr_0 = 1.0628e-04
Loss = 2.6864e-03, PNorm = 155.2096, GNorm = 0.0821, lr_0 = 1.0621e-04
Loss = 1.0708e-03, PNorm = 155.2099, GNorm = 0.0920, lr_0 = 1.0614e-04
Loss = 6.5695e-04, PNorm = 155.2108, GNorm = 0.2700, lr_0 = 1.0606e-04
Loss = 7.6337e-04, PNorm = 155.2131, GNorm = 0.0864, lr_0 = 1.0599e-04
Loss = 5.2582e-04, PNorm = 155.2142, GNorm = 0.0808, lr_0 = 1.0592e-04
Loss = 9.9109e-04, PNorm = 155.2148, GNorm = 0.0837, lr_0 = 1.0585e-04
Loss = 7.2760e-04, PNorm = 155.2157, GNorm = 0.1450, lr_0 = 1.0577e-04
Loss = 5.4299e-04, PNorm = 155.2182, GNorm = 0.1040, lr_0 = 1.0570e-04
Loss = 1.2258e-03, PNorm = 155.2186, GNorm = 0.0743, lr_0 = 1.0563e-04
Loss = 8.0162e-04, PNorm = 155.2178, GNorm = 0.0436, lr_0 = 1.0556e-04
Loss = 8.6948e-04, PNorm = 155.2185, GNorm = 0.1154, lr_0 = 1.0548e-04
Loss = 1.0382e-03, PNorm = 155.2200, GNorm = 0.0603, lr_0 = 1.0541e-04
Loss = 1.3005e-03, PNorm = 155.2216, GNorm = 0.0304, lr_0 = 1.0534e-04
Loss = 4.1647e-04, PNorm = 155.2226, GNorm = 0.1273, lr_0 = 1.0527e-04
Loss = 1.3694e-03, PNorm = 155.2230, GNorm = 0.0284, lr_0 = 1.0519e-04
Loss = 2.3125e-03, PNorm = 155.2227, GNorm = 0.1457, lr_0 = 1.0512e-04
Loss = 6.1246e-04, PNorm = 155.2239, GNorm = 0.0371, lr_0 = 1.0505e-04
Loss = 7.5575e-04, PNorm = 155.2240, GNorm = 0.1126, lr_0 = 1.0498e-04
Loss = 1.5948e-03, PNorm = 155.2256, GNorm = 0.3330, lr_0 = 1.0491e-04
Loss = 8.5218e-04, PNorm = 155.2273, GNorm = 0.0808, lr_0 = 1.0483e-04
Loss = 1.2718e-03, PNorm = 155.2277, GNorm = 0.0312, lr_0 = 1.0476e-04
Loss = 1.1568e-03, PNorm = 155.2284, GNorm = 0.0425, lr_0 = 1.0469e-04
Loss = 8.2960e-04, PNorm = 155.2289, GNorm = 0.1275, lr_0 = 1.0462e-04
Loss = 2.0869e-03, PNorm = 155.2288, GNorm = 0.1408, lr_0 = 1.0455e-04
Loss = 1.9958e-03, PNorm = 155.2318, GNorm = 0.0516, lr_0 = 1.0448e-04
Loss = 5.5394e-04, PNorm = 155.2340, GNorm = 0.0877, lr_0 = 1.0440e-04
Loss = 6.4636e-04, PNorm = 155.2357, GNorm = 0.0823, lr_0 = 1.0433e-04
Loss = 6.8559e-04, PNorm = 155.2373, GNorm = 0.0259, lr_0 = 1.0426e-04
Loss = 1.9806e-03, PNorm = 155.2371, GNorm = 0.0230, lr_0 = 1.0419e-04
Loss = 1.5350e-03, PNorm = 155.2379, GNorm = 0.1122, lr_0 = 1.0412e-04
Loss = 8.1350e-04, PNorm = 155.2393, GNorm = 0.0444, lr_0 = 1.0405e-04
Loss = 1.6136e-03, PNorm = 155.2405, GNorm = 0.1025, lr_0 = 1.0398e-04
Loss = 1.0270e-03, PNorm = 155.2407, GNorm = 0.0899, lr_0 = 1.0391e-04
Loss = 1.1720e-03, PNorm = 155.2413, GNorm = 0.1305, lr_0 = 1.0383e-04
Loss = 5.0058e-04, PNorm = 155.2410, GNorm = 0.0271, lr_0 = 1.0376e-04
Loss = 9.9480e-04, PNorm = 155.2434, GNorm = 0.2201, lr_0 = 1.0369e-04
Loss = 1.6151e-03, PNorm = 155.2441, GNorm = 0.0980, lr_0 = 1.0362e-04
Loss = 4.1894e-04, PNorm = 155.2461, GNorm = 0.0344, lr_0 = 1.0355e-04
Loss = 4.2057e-04, PNorm = 155.2468, GNorm = 0.1034, lr_0 = 1.0348e-04
Loss = 8.6084e-04, PNorm = 155.2491, GNorm = 0.0430, lr_0 = 1.0341e-04
Loss = 5.2002e-04, PNorm = 155.2499, GNorm = 0.0615, lr_0 = 1.0334e-04
Loss = 1.1706e-03, PNorm = 155.2504, GNorm = 0.1044, lr_0 = 1.0327e-04
Loss = 2.2196e-03, PNorm = 155.2508, GNorm = 0.1056, lr_0 = 1.0320e-04
Loss = 1.4589e-03, PNorm = 155.2514, GNorm = 0.0933, lr_0 = 1.0312e-04
Loss = 4.8812e-04, PNorm = 155.2526, GNorm = 0.1843, lr_0 = 1.0305e-04
Loss = 4.5768e-04, PNorm = 155.2532, GNorm = 0.0388, lr_0 = 1.0298e-04
Loss = 2.5917e-03, PNorm = 155.2535, GNorm = 0.0464, lr_0 = 1.0291e-04
Loss = 6.3367e-04, PNorm = 155.2554, GNorm = 0.0299, lr_0 = 1.0284e-04
Loss = 8.0448e-04, PNorm = 155.2566, GNorm = 0.0504, lr_0 = 1.0277e-04
Loss = 8.2798e-04, PNorm = 155.2579, GNorm = 0.1190, lr_0 = 1.0270e-04
Loss = 1.0552e-03, PNorm = 155.2588, GNorm = 0.0739, lr_0 = 1.0263e-04
Loss = 2.3304e-03, PNorm = 155.2595, GNorm = 0.1141, lr_0 = 1.0256e-04
Loss = 1.0718e-03, PNorm = 155.2613, GNorm = 0.0412, lr_0 = 1.0249e-04
Loss = 4.1995e-04, PNorm = 155.2613, GNorm = 0.1133, lr_0 = 1.0242e-04
Loss = 7.0203e-04, PNorm = 155.2609, GNorm = 0.0292, lr_0 = 1.0235e-04
Loss = 1.7401e-03, PNorm = 155.2613, GNorm = 0.1246, lr_0 = 1.0228e-04
Loss = 1.4180e-03, PNorm = 155.2621, GNorm = 0.0759, lr_0 = 1.0221e-04
Loss = 9.9933e-04, PNorm = 155.2635, GNorm = 0.1125, lr_0 = 1.0214e-04
Loss = 3.9555e-04, PNorm = 155.2644, GNorm = 0.0359, lr_0 = 1.0207e-04
Loss = 1.5011e-03, PNorm = 155.2659, GNorm = 0.3144, lr_0 = 1.0200e-04
Loss = 1.4420e-03, PNorm = 155.2676, GNorm = 0.0589, lr_0 = 1.0193e-04
Loss = 1.1629e-03, PNorm = 155.2674, GNorm = 0.1422, lr_0 = 1.0186e-04
Loss = 9.1724e-04, PNorm = 155.2695, GNorm = 0.0224, lr_0 = 1.0179e-04
Loss = 1.6053e-03, PNorm = 155.2719, GNorm = 0.0495, lr_0 = 1.0172e-04
Loss = 3.7857e-03, PNorm = 155.2726, GNorm = 0.0624, lr_0 = 1.0165e-04
Loss = 2.1967e-03, PNorm = 155.2735, GNorm = 0.0675, lr_0 = 1.0158e-04
Loss = 7.5629e-04, PNorm = 155.2743, GNorm = 0.0252, lr_0 = 1.0151e-04
Loss = 7.9812e-04, PNorm = 155.2758, GNorm = 0.0395, lr_0 = 1.0144e-04
Loss = 9.6807e-04, PNorm = 155.2771, GNorm = 0.0591, lr_0 = 1.0137e-04
Loss = 5.9090e-04, PNorm = 155.2789, GNorm = 0.0803, lr_0 = 1.0130e-04
Loss = 1.6477e-03, PNorm = 155.2801, GNorm = 0.0274, lr_0 = 1.0123e-04
Loss = 5.4790e-04, PNorm = 155.2838, GNorm = 0.1201, lr_0 = 1.0116e-04
Loss = 1.3612e-03, PNorm = 155.2841, GNorm = 0.0878, lr_0 = 1.0110e-04
Loss = 6.6362e-04, PNorm = 155.2856, GNorm = 0.0426, lr_0 = 1.0103e-04
Loss = 4.1186e-04, PNorm = 155.2862, GNorm = 0.1146, lr_0 = 1.0096e-04
Loss = 8.8187e-04, PNorm = 155.2864, GNorm = 0.0817, lr_0 = 1.0089e-04
Loss = 4.8313e-04, PNorm = 155.2882, GNorm = 0.0296, lr_0 = 1.0082e-04
Loss = 4.5974e-04, PNorm = 155.2885, GNorm = 0.0639, lr_0 = 1.0075e-04
Loss = 5.1500e-04, PNorm = 155.2894, GNorm = 0.0504, lr_0 = 1.0068e-04
Loss = 3.4536e-04, PNorm = 155.2899, GNorm = 0.0751, lr_0 = 1.0061e-04
Loss = 7.4582e-04, PNorm = 155.2904, GNorm = 0.2789, lr_0 = 1.0054e-04
Loss = 7.2601e-04, PNorm = 155.2921, GNorm = 0.0516, lr_0 = 1.0047e-04
Loss = 7.7947e-04, PNorm = 155.2929, GNorm = 0.0272, lr_0 = 1.0041e-04
Loss = 1.2403e-03, PNorm = 155.2930, GNorm = 0.0973, lr_0 = 1.0034e-04
Loss = 1.1443e-03, PNorm = 155.2933, GNorm = 0.1227, lr_0 = 1.0027e-04
Loss = 5.8757e-04, PNorm = 155.2950, GNorm = 0.0667, lr_0 = 1.0020e-04
Loss = 2.1060e-03, PNorm = 155.2969, GNorm = 0.0671, lr_0 = 1.0013e-04
Loss = 1.0157e-03, PNorm = 155.2974, GNorm = 0.1236, lr_0 = 1.0006e-04
Loss = 2.5073e-03, PNorm = 155.2999, GNorm = 0.1161, lr_0 = 1.0000e-04
Validation mae = 0.474861
Model 0 best validation mae = 0.474335 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.457525
Ensemble test mae = 0.457525
Fold 9
Splitting data with seed 9
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=1900, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=1900, out_features=1900, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=1900, out_features=1, bias=True)
  )
)
Number of parameters = 7,506,901
Moving model to cuda
Epoch 0
Loss = 9.4093e-01, PNorm = 62.2445, GNorm = 1.6787, lr_0 = 1.0413e-04
Loss = 7.2165e-01, PNorm = 62.2561, GNorm = 1.9406, lr_0 = 1.0788e-04
Loss = 5.7044e-01, PNorm = 62.2688, GNorm = 1.5388, lr_0 = 1.1163e-04
Loss = 5.3106e-01, PNorm = 62.2809, GNorm = 1.3548, lr_0 = 1.1537e-04
Loss = 4.1436e-01, PNorm = 62.2906, GNorm = 1.8166, lr_0 = 1.1913e-04
Loss = 4.5932e-01, PNorm = 62.3002, GNorm = 2.4238, lr_0 = 1.2287e-04
Loss = 4.2904e-01, PNorm = 62.3096, GNorm = 1.6686, lr_0 = 1.2663e-04
Loss = 3.7766e-01, PNorm = 62.3195, GNorm = 1.4776, lr_0 = 1.3038e-04
Loss = 3.8385e-01, PNorm = 62.3295, GNorm = 1.9347, lr_0 = 1.3413e-04
Loss = 3.9351e-01, PNorm = 62.3388, GNorm = 3.7378, lr_0 = 1.3788e-04
Loss = 3.5680e-01, PNorm = 62.3490, GNorm = 1.9515, lr_0 = 1.4163e-04
Loss = 3.2979e-01, PNorm = 62.3570, GNorm = 1.8071, lr_0 = 1.4537e-04
Loss = 3.7534e-01, PNorm = 62.3673, GNorm = 2.3478, lr_0 = 1.4913e-04
Loss = 3.3104e-01, PNorm = 62.3779, GNorm = 2.7946, lr_0 = 1.5288e-04
Loss = 3.6511e-01, PNorm = 62.3880, GNorm = 2.1113, lr_0 = 1.5662e-04
Loss = 3.4067e-01, PNorm = 62.3975, GNorm = 3.1220, lr_0 = 1.6038e-04
Loss = 3.2573e-01, PNorm = 62.4069, GNorm = 3.6204, lr_0 = 1.6412e-04
Loss = 3.4288e-01, PNorm = 62.4201, GNorm = 2.8927, lr_0 = 1.6788e-04
Loss = 3.7209e-01, PNorm = 62.4323, GNorm = 1.8975, lr_0 = 1.7163e-04
Loss = 3.1041e-01, PNorm = 62.4433, GNorm = 2.8927, lr_0 = 1.7538e-04
Loss = 3.1990e-01, PNorm = 62.4565, GNorm = 1.5570, lr_0 = 1.7913e-04
Loss = 3.2784e-01, PNorm = 62.4689, GNorm = 1.6360, lr_0 = 1.8288e-04
Loss = 2.8065e-01, PNorm = 62.4802, GNorm = 1.5400, lr_0 = 1.8662e-04
Loss = 2.9072e-01, PNorm = 62.4909, GNorm = 1.1489, lr_0 = 1.9038e-04
Loss = 2.8813e-01, PNorm = 62.5032, GNorm = 1.4559, lr_0 = 1.9413e-04
Loss = 3.0110e-01, PNorm = 62.5165, GNorm = 1.3317, lr_0 = 1.9788e-04
Loss = 2.9548e-01, PNorm = 62.5294, GNorm = 1.5136, lr_0 = 2.0163e-04
Loss = 2.7558e-01, PNorm = 62.5426, GNorm = 1.5318, lr_0 = 2.0537e-04
Loss = 2.9571e-01, PNorm = 62.5579, GNorm = 1.6175, lr_0 = 2.0913e-04
Loss = 2.8464e-01, PNorm = 62.5691, GNorm = 2.4918, lr_0 = 2.1288e-04
Loss = 2.6307e-01, PNorm = 62.5821, GNorm = 1.9821, lr_0 = 2.1663e-04
Loss = 2.9257e-01, PNorm = 62.5981, GNorm = 2.9666, lr_0 = 2.2038e-04
Loss = 2.8374e-01, PNorm = 62.6156, GNorm = 1.7863, lr_0 = 2.2412e-04
Loss = 2.4586e-01, PNorm = 62.6327, GNorm = 1.4003, lr_0 = 2.2787e-04
Loss = 3.2218e-01, PNorm = 62.6484, GNorm = 1.7862, lr_0 = 2.3163e-04
Loss = 2.3956e-01, PNorm = 62.6675, GNorm = 1.7742, lr_0 = 2.3538e-04
Loss = 2.5789e-01, PNorm = 62.6848, GNorm = 1.5499, lr_0 = 2.3913e-04
Loss = 2.8413e-01, PNorm = 62.7029, GNorm = 1.1133, lr_0 = 2.4288e-04
Loss = 2.3782e-01, PNorm = 62.7218, GNorm = 1.3988, lr_0 = 2.4662e-04
Loss = 2.7783e-01, PNorm = 62.7402, GNorm = 2.5174, lr_0 = 2.5038e-04
Loss = 2.4861e-01, PNorm = 62.7594, GNorm = 1.2406, lr_0 = 2.5413e-04
Loss = 3.0085e-01, PNorm = 62.7750, GNorm = 1.4884, lr_0 = 2.5788e-04
Loss = 2.5371e-01, PNorm = 62.7966, GNorm = 1.3440, lr_0 = 2.6163e-04
Loss = 2.5131e-01, PNorm = 62.8167, GNorm = 1.8547, lr_0 = 2.6537e-04
Loss = 2.8599e-01, PNorm = 62.8336, GNorm = 2.2973, lr_0 = 2.6912e-04
Loss = 2.4962e-01, PNorm = 62.8571, GNorm = 1.2804, lr_0 = 2.7288e-04
Loss = 2.5261e-01, PNorm = 62.8774, GNorm = 1.1732, lr_0 = 2.7663e-04
Loss = 2.8715e-01, PNorm = 62.8978, GNorm = 1.2035, lr_0 = 2.8038e-04
Loss = 2.9105e-01, PNorm = 62.9231, GNorm = 1.1103, lr_0 = 2.8413e-04
Loss = 2.6610e-01, PNorm = 62.9472, GNorm = 1.3978, lr_0 = 2.8787e-04
Loss = 2.1969e-01, PNorm = 62.9731, GNorm = 1.3079, lr_0 = 2.9163e-04
Loss = 2.4431e-01, PNorm = 62.9948, GNorm = 1.6630, lr_0 = 2.9538e-04
Loss = 2.3426e-01, PNorm = 63.0154, GNorm = 0.9879, lr_0 = 2.9913e-04
Loss = 2.5031e-01, PNorm = 63.0403, GNorm = 1.6952, lr_0 = 3.0288e-04
Loss = 2.6486e-01, PNorm = 63.0638, GNorm = 1.2460, lr_0 = 3.0662e-04
Loss = 2.3664e-01, PNorm = 63.0878, GNorm = 1.1460, lr_0 = 3.1037e-04
Loss = 2.1653e-01, PNorm = 63.1118, GNorm = 1.3317, lr_0 = 3.1413e-04
Loss = 2.7421e-01, PNorm = 63.1310, GNorm = 1.3894, lr_0 = 3.1788e-04
Loss = 2.4534e-01, PNorm = 63.1555, GNorm = 1.0432, lr_0 = 3.2163e-04
Loss = 2.5035e-01, PNorm = 63.1841, GNorm = 1.4509, lr_0 = 3.2538e-04
Loss = 2.5829e-01, PNorm = 63.2103, GNorm = 1.9151, lr_0 = 3.2912e-04
Loss = 2.7628e-01, PNorm = 63.2357, GNorm = 1.5689, lr_0 = 3.3288e-04
Loss = 2.4277e-01, PNorm = 63.2645, GNorm = 1.2023, lr_0 = 3.3663e-04
Loss = 2.3104e-01, PNorm = 63.2922, GNorm = 1.0495, lr_0 = 3.4038e-04
Loss = 2.6876e-01, PNorm = 63.3221, GNorm = 0.9292, lr_0 = 3.4413e-04
Loss = 2.5063e-01, PNorm = 63.3539, GNorm = 1.0695, lr_0 = 3.4787e-04
Loss = 2.1876e-01, PNorm = 63.3846, GNorm = 1.4257, lr_0 = 3.5162e-04
Loss = 2.5157e-01, PNorm = 63.4146, GNorm = 2.2294, lr_0 = 3.5538e-04
Loss = 2.4431e-01, PNorm = 63.4492, GNorm = 1.2283, lr_0 = 3.5913e-04
Loss = 2.7334e-01, PNorm = 63.4814, GNorm = 1.6497, lr_0 = 3.6288e-04
Loss = 2.4638e-01, PNorm = 63.5137, GNorm = 1.4850, lr_0 = 3.6662e-04
Loss = 2.2548e-01, PNorm = 63.5428, GNorm = 1.1500, lr_0 = 3.7037e-04
Loss = 2.3396e-01, PNorm = 63.5775, GNorm = 1.0807, lr_0 = 3.7413e-04
Loss = 2.5798e-01, PNorm = 63.6054, GNorm = 1.0515, lr_0 = 3.7788e-04
Loss = 2.5152e-01, PNorm = 63.6405, GNorm = 1.4821, lr_0 = 3.8163e-04
Loss = 2.5597e-01, PNorm = 63.6762, GNorm = 1.0857, lr_0 = 3.8537e-04
Loss = 2.6309e-01, PNorm = 63.7139, GNorm = 1.3075, lr_0 = 3.8912e-04
Loss = 2.4090e-01, PNorm = 63.7498, GNorm = 1.3351, lr_0 = 3.9287e-04
Loss = 2.5015e-01, PNorm = 63.7834, GNorm = 1.7197, lr_0 = 3.9663e-04
Loss = 2.3005e-01, PNorm = 63.8160, GNorm = 1.2798, lr_0 = 4.0038e-04
Loss = 2.4025e-01, PNorm = 63.8569, GNorm = 0.9257, lr_0 = 4.0413e-04
Loss = 2.4410e-01, PNorm = 63.8924, GNorm = 1.0943, lr_0 = 4.0787e-04
Loss = 2.4158e-01, PNorm = 63.9301, GNorm = 1.1504, lr_0 = 4.1162e-04
Loss = 2.2675e-01, PNorm = 63.9750, GNorm = 1.1026, lr_0 = 4.1537e-04
Loss = 2.6525e-01, PNorm = 64.0114, GNorm = 1.2381, lr_0 = 4.1913e-04
Loss = 2.1241e-01, PNorm = 64.0452, GNorm = 0.9523, lr_0 = 4.2288e-04
Loss = 2.2464e-01, PNorm = 64.0840, GNorm = 1.1962, lr_0 = 4.2662e-04
Loss = 2.5006e-01, PNorm = 64.1178, GNorm = 0.7012, lr_0 = 4.3037e-04
Loss = 2.2356e-01, PNorm = 64.1619, GNorm = 0.6744, lr_0 = 4.3412e-04
Loss = 1.9640e-01, PNorm = 64.2021, GNorm = 1.1253, lr_0 = 4.3788e-04
Loss = 2.3937e-01, PNorm = 64.2380, GNorm = 1.8719, lr_0 = 4.4163e-04
Loss = 2.9948e-01, PNorm = 64.2794, GNorm = 1.0148, lr_0 = 4.4538e-04
Loss = 2.7138e-01, PNorm = 64.3305, GNorm = 1.3231, lr_0 = 4.4912e-04
Loss = 1.9442e-01, PNorm = 64.3768, GNorm = 1.0100, lr_0 = 4.5287e-04
Loss = 2.5511e-01, PNorm = 64.4260, GNorm = 0.9323, lr_0 = 4.5662e-04
Loss = 2.3558e-01, PNorm = 64.4735, GNorm = 1.6695, lr_0 = 4.6038e-04
Loss = 2.1067e-01, PNorm = 64.5224, GNorm = 1.3837, lr_0 = 4.6413e-04
Loss = 2.1027e-01, PNorm = 64.5711, GNorm = 1.0402, lr_0 = 4.6787e-04
Loss = 2.0665e-01, PNorm = 64.6115, GNorm = 1.1050, lr_0 = 4.7162e-04
Loss = 2.3513e-01, PNorm = 64.6629, GNorm = 1.3366, lr_0 = 4.7537e-04
Loss = 2.3553e-01, PNorm = 64.7045, GNorm = 1.4109, lr_0 = 4.7913e-04
Loss = 1.8854e-01, PNorm = 64.7540, GNorm = 0.9039, lr_0 = 4.8288e-04
Loss = 2.2059e-01, PNorm = 64.7984, GNorm = 1.3502, lr_0 = 4.8663e-04
Loss = 2.3717e-01, PNorm = 64.8464, GNorm = 0.8651, lr_0 = 4.9038e-04
Loss = 2.1111e-01, PNorm = 64.8956, GNorm = 1.0763, lr_0 = 4.9412e-04
Loss = 2.3781e-01, PNorm = 64.9424, GNorm = 1.3582, lr_0 = 4.9788e-04
Loss = 2.2497e-01, PNorm = 64.9947, GNorm = 1.1108, lr_0 = 5.0163e-04
Loss = 2.2647e-01, PNorm = 65.0457, GNorm = 0.9701, lr_0 = 5.0538e-04
Loss = 2.3777e-01, PNorm = 65.0965, GNorm = 1.0927, lr_0 = 5.0913e-04
Loss = 2.3639e-01, PNorm = 65.1510, GNorm = 0.8045, lr_0 = 5.1287e-04
Loss = 2.2648e-01, PNorm = 65.2079, GNorm = 0.8670, lr_0 = 5.1663e-04
Loss = 2.0330e-01, PNorm = 65.2593, GNorm = 1.4146, lr_0 = 5.2038e-04
Loss = 2.1821e-01, PNorm = 65.3088, GNorm = 1.0219, lr_0 = 5.2413e-04
Loss = 2.0345e-01, PNorm = 65.3634, GNorm = 0.8828, lr_0 = 5.2788e-04
Loss = 2.4044e-01, PNorm = 65.4123, GNorm = 1.2781, lr_0 = 5.3162e-04
Loss = 2.5148e-01, PNorm = 65.4670, GNorm = 1.0385, lr_0 = 5.3538e-04
Loss = 1.7025e-01, PNorm = 65.5249, GNorm = 1.0379, lr_0 = 5.3912e-04
Loss = 2.0436e-01, PNorm = 65.5756, GNorm = 1.2546, lr_0 = 5.4288e-04
Loss = 2.0273e-01, PNorm = 65.6268, GNorm = 0.7365, lr_0 = 5.4663e-04
Loss = 2.2221e-01, PNorm = 65.6876, GNorm = 1.0492, lr_0 = 5.5038e-04
Validation mae = 0.559676
Epoch 1
Loss = 1.5199e-01, PNorm = 65.7393, GNorm = 0.9075, lr_0 = 5.5413e-04
Loss = 1.6336e-01, PNorm = 65.7959, GNorm = 1.0929, lr_0 = 5.5787e-04
Loss = 1.4499e-01, PNorm = 65.8472, GNorm = 0.8463, lr_0 = 5.6163e-04
Loss = 1.3854e-01, PNorm = 65.8973, GNorm = 0.7333, lr_0 = 5.6538e-04
Loss = 1.6027e-01, PNorm = 65.9523, GNorm = 0.8764, lr_0 = 5.6913e-04
Loss = 1.4442e-01, PNorm = 65.9971, GNorm = 0.7370, lr_0 = 5.7288e-04
Loss = 1.4943e-01, PNorm = 66.0504, GNorm = 0.9895, lr_0 = 5.7662e-04
Loss = 1.5089e-01, PNorm = 66.0992, GNorm = 0.8077, lr_0 = 5.8038e-04
Loss = 1.3518e-01, PNorm = 66.1505, GNorm = 0.7890, lr_0 = 5.8413e-04
Loss = 1.4820e-01, PNorm = 66.2018, GNorm = 0.6551, lr_0 = 5.8788e-04
Loss = 1.5108e-01, PNorm = 66.2564, GNorm = 0.7521, lr_0 = 5.9163e-04
Loss = 1.4072e-01, PNorm = 66.3137, GNorm = 0.9290, lr_0 = 5.9538e-04
Loss = 1.5191e-01, PNorm = 66.3665, GNorm = 0.7819, lr_0 = 5.9913e-04
Loss = 1.5739e-01, PNorm = 66.4292, GNorm = 0.8553, lr_0 = 6.0288e-04
Loss = 1.5027e-01, PNorm = 66.4895, GNorm = 0.8015, lr_0 = 6.0663e-04
Loss = 1.5483e-01, PNorm = 66.5519, GNorm = 0.7973, lr_0 = 6.1038e-04
Loss = 1.5835e-01, PNorm = 66.6034, GNorm = 0.8496, lr_0 = 6.1413e-04
Loss = 1.6732e-01, PNorm = 66.6652, GNorm = 0.9594, lr_0 = 6.1788e-04
Loss = 1.5995e-01, PNorm = 66.7281, GNorm = 0.8464, lr_0 = 6.2163e-04
Loss = 1.6350e-01, PNorm = 66.8032, GNorm = 0.6690, lr_0 = 6.2538e-04
Loss = 1.3283e-01, PNorm = 66.8764, GNorm = 0.7592, lr_0 = 6.2913e-04
Loss = 1.6333e-01, PNorm = 66.9415, GNorm = 0.8320, lr_0 = 6.3288e-04
Loss = 1.5493e-01, PNorm = 67.0144, GNorm = 0.8507, lr_0 = 6.3663e-04
Loss = 1.5436e-01, PNorm = 67.0793, GNorm = 0.5815, lr_0 = 6.4038e-04
Loss = 1.4301e-01, PNorm = 67.1460, GNorm = 0.7885, lr_0 = 6.4413e-04
Loss = 1.5954e-01, PNorm = 67.2164, GNorm = 1.0504, lr_0 = 6.4788e-04
Loss = 1.4749e-01, PNorm = 67.2840, GNorm = 0.8007, lr_0 = 6.5163e-04
Loss = 1.5183e-01, PNorm = 67.3570, GNorm = 0.9072, lr_0 = 6.5538e-04
Loss = 1.6238e-01, PNorm = 67.4358, GNorm = 0.8002, lr_0 = 6.5913e-04
Loss = 1.5023e-01, PNorm = 67.5100, GNorm = 0.7656, lr_0 = 6.6288e-04
Loss = 1.7541e-01, PNorm = 67.5948, GNorm = 0.7182, lr_0 = 6.6663e-04
Loss = 1.5816e-01, PNorm = 67.6696, GNorm = 0.7625, lr_0 = 6.7038e-04
Loss = 1.4695e-01, PNorm = 67.7517, GNorm = 0.6404, lr_0 = 6.7413e-04
Loss = 1.3902e-01, PNorm = 67.8358, GNorm = 1.0677, lr_0 = 6.7788e-04
Loss = 1.4386e-01, PNorm = 67.9152, GNorm = 0.7764, lr_0 = 6.8163e-04
Loss = 1.7376e-01, PNorm = 67.9859, GNorm = 0.7024, lr_0 = 6.8538e-04
Loss = 1.8661e-01, PNorm = 68.0702, GNorm = 0.7656, lr_0 = 6.8913e-04
Loss = 1.6323e-01, PNorm = 68.1534, GNorm = 0.9482, lr_0 = 6.9288e-04
Loss = 1.9272e-01, PNorm = 68.2502, GNorm = 0.7506, lr_0 = 6.9663e-04
Loss = 1.5432e-01, PNorm = 68.3404, GNorm = 0.7730, lr_0 = 7.0038e-04
Loss = 1.4789e-01, PNorm = 68.4357, GNorm = 0.8033, lr_0 = 7.0413e-04
Loss = 1.9096e-01, PNorm = 68.5299, GNorm = 0.7978, lr_0 = 7.0788e-04
Loss = 1.6459e-01, PNorm = 68.6329, GNorm = 0.8430, lr_0 = 7.1163e-04
Loss = 1.5531e-01, PNorm = 68.7259, GNorm = 0.7861, lr_0 = 7.1538e-04
Loss = 1.4255e-01, PNorm = 68.8130, GNorm = 0.7904, lr_0 = 7.1913e-04
Loss = 1.6723e-01, PNorm = 68.8979, GNorm = 0.9925, lr_0 = 7.2288e-04
Loss = 1.7760e-01, PNorm = 68.9868, GNorm = 1.1911, lr_0 = 7.2663e-04
Loss = 1.5965e-01, PNorm = 69.0772, GNorm = 0.8365, lr_0 = 7.3038e-04
Loss = 1.9296e-01, PNorm = 69.1679, GNorm = 0.7691, lr_0 = 7.3413e-04
Loss = 1.7575e-01, PNorm = 69.2649, GNorm = 0.8142, lr_0 = 7.3788e-04
Loss = 1.4745e-01, PNorm = 69.3497, GNorm = 0.7997, lr_0 = 7.4163e-04
Loss = 1.6594e-01, PNorm = 69.4342, GNorm = 1.0239, lr_0 = 7.4538e-04
Loss = 1.7987e-01, PNorm = 69.5249, GNorm = 1.2561, lr_0 = 7.4913e-04
Loss = 1.4684e-01, PNorm = 69.6098, GNorm = 0.8751, lr_0 = 7.5288e-04
Loss = 1.4057e-01, PNorm = 69.7036, GNorm = 0.8491, lr_0 = 7.5663e-04
Loss = 1.7365e-01, PNorm = 69.7875, GNorm = 1.2397, lr_0 = 7.6038e-04
Loss = 1.7232e-01, PNorm = 69.8887, GNorm = 1.0256, lr_0 = 7.6413e-04
Loss = 1.6986e-01, PNorm = 69.9821, GNorm = 0.7333, lr_0 = 7.6788e-04
Loss = 1.6413e-01, PNorm = 70.0927, GNorm = 0.6745, lr_0 = 7.7163e-04
Loss = 1.6101e-01, PNorm = 70.1787, GNorm = 0.7510, lr_0 = 7.7538e-04
Loss = 1.7689e-01, PNorm = 70.2815, GNorm = 0.9899, lr_0 = 7.7913e-04
Loss = 1.6882e-01, PNorm = 70.3800, GNorm = 0.6915, lr_0 = 7.8288e-04
Loss = 1.5152e-01, PNorm = 70.4829, GNorm = 0.6929, lr_0 = 7.8663e-04
Loss = 1.8025e-01, PNorm = 70.5836, GNorm = 0.7203, lr_0 = 7.9038e-04
Loss = 1.6018e-01, PNorm = 70.6872, GNorm = 0.5933, lr_0 = 7.9413e-04
Loss = 1.5448e-01, PNorm = 70.7857, GNorm = 0.7714, lr_0 = 7.9788e-04
Loss = 1.6424e-01, PNorm = 70.8889, GNorm = 0.9703, lr_0 = 8.0163e-04
Loss = 1.4840e-01, PNorm = 70.9836, GNorm = 0.7215, lr_0 = 8.0538e-04
Loss = 1.7261e-01, PNorm = 71.0927, GNorm = 0.8706, lr_0 = 8.0913e-04
Loss = 1.7602e-01, PNorm = 71.1987, GNorm = 0.9168, lr_0 = 8.1288e-04
Loss = 1.7770e-01, PNorm = 71.3124, GNorm = 1.2902, lr_0 = 8.1663e-04
Loss = 1.5915e-01, PNorm = 71.4196, GNorm = 0.8536, lr_0 = 8.2038e-04
Loss = 1.6946e-01, PNorm = 71.5280, GNorm = 0.6874, lr_0 = 8.2413e-04
Loss = 1.6800e-01, PNorm = 71.6402, GNorm = 0.8145, lr_0 = 8.2788e-04
Loss = 1.7845e-01, PNorm = 71.7637, GNorm = 0.6942, lr_0 = 8.3163e-04
Loss = 1.9161e-01, PNorm = 71.8814, GNorm = 0.9643, lr_0 = 8.3538e-04
Loss = 1.8002e-01, PNorm = 72.0139, GNorm = 0.7326, lr_0 = 8.3913e-04
Loss = 1.7623e-01, PNorm = 72.1365, GNorm = 0.9166, lr_0 = 8.4288e-04
Loss = 1.7398e-01, PNorm = 72.2705, GNorm = 1.1860, lr_0 = 8.4663e-04
Loss = 1.6795e-01, PNorm = 72.3912, GNorm = 0.9725, lr_0 = 8.5038e-04
Loss = 1.8087e-01, PNorm = 72.5234, GNorm = 0.8956, lr_0 = 8.5413e-04
Loss = 1.8506e-01, PNorm = 72.6549, GNorm = 0.6933, lr_0 = 8.5788e-04
Loss = 1.6917e-01, PNorm = 72.7734, GNorm = 0.5145, lr_0 = 8.6163e-04
Loss = 1.8918e-01, PNorm = 72.8970, GNorm = 0.9456, lr_0 = 8.6538e-04
Loss = 1.4956e-01, PNorm = 73.0196, GNorm = 0.7823, lr_0 = 8.6913e-04
Loss = 1.8141e-01, PNorm = 73.1226, GNorm = 1.1253, lr_0 = 8.7288e-04
Loss = 1.7144e-01, PNorm = 73.2425, GNorm = 1.2485, lr_0 = 8.7663e-04
Loss = 1.8614e-01, PNorm = 73.3656, GNorm = 0.7719, lr_0 = 8.8038e-04
Loss = 1.4972e-01, PNorm = 73.4867, GNorm = 0.5659, lr_0 = 8.8413e-04
Loss = 1.7236e-01, PNorm = 73.5970, GNorm = 1.0384, lr_0 = 8.8788e-04
Loss = 1.7627e-01, PNorm = 73.7155, GNorm = 0.8865, lr_0 = 8.9163e-04
Loss = 1.7941e-01, PNorm = 73.8395, GNorm = 1.0355, lr_0 = 8.9538e-04
Loss = 1.7222e-01, PNorm = 73.9554, GNorm = 0.7881, lr_0 = 8.9913e-04
Loss = 1.8655e-01, PNorm = 74.0941, GNorm = 1.2487, lr_0 = 9.0288e-04
Loss = 1.8965e-01, PNorm = 74.2248, GNorm = 0.6012, lr_0 = 9.0663e-04
Loss = 1.9028e-01, PNorm = 74.3729, GNorm = 1.5529, lr_0 = 9.1038e-04
Loss = 1.8866e-01, PNorm = 74.5058, GNorm = 0.9135, lr_0 = 9.1413e-04
Loss = 1.5805e-01, PNorm = 74.6262, GNorm = 0.7147, lr_0 = 9.1788e-04
Loss = 1.7538e-01, PNorm = 74.7412, GNorm = 0.7034, lr_0 = 9.2163e-04
Loss = 1.6396e-01, PNorm = 74.8673, GNorm = 0.7487, lr_0 = 9.2538e-04
Loss = 1.7379e-01, PNorm = 74.9758, GNorm = 0.7817, lr_0 = 9.2913e-04
Loss = 1.8642e-01, PNorm = 75.0950, GNorm = 1.0801, lr_0 = 9.3288e-04
Loss = 1.6436e-01, PNorm = 75.2121, GNorm = 0.7593, lr_0 = 9.3663e-04
Loss = 1.6531e-01, PNorm = 75.3377, GNorm = 0.4935, lr_0 = 9.4038e-04
Loss = 1.7445e-01, PNorm = 75.4576, GNorm = 0.6988, lr_0 = 9.4413e-04
Loss = 1.7129e-01, PNorm = 75.5756, GNorm = 0.8817, lr_0 = 9.4788e-04
Loss = 1.9534e-01, PNorm = 75.7034, GNorm = 1.0791, lr_0 = 9.5163e-04
Loss = 1.4503e-01, PNorm = 75.8384, GNorm = 0.6519, lr_0 = 9.5538e-04
Loss = 1.9041e-01, PNorm = 75.9616, GNorm = 0.9141, lr_0 = 9.5913e-04
Loss = 1.6896e-01, PNorm = 76.1040, GNorm = 0.6778, lr_0 = 9.6288e-04
Loss = 1.6753e-01, PNorm = 76.2299, GNorm = 0.7465, lr_0 = 9.6663e-04
Loss = 1.6739e-01, PNorm = 76.3456, GNorm = 0.6560, lr_0 = 9.7038e-04
Loss = 1.7988e-01, PNorm = 76.4766, GNorm = 0.6124, lr_0 = 9.7413e-04
Loss = 1.8065e-01, PNorm = 76.6104, GNorm = 0.5960, lr_0 = 9.7788e-04
Loss = 1.7932e-01, PNorm = 76.7539, GNorm = 0.6425, lr_0 = 9.8163e-04
Loss = 1.6944e-01, PNorm = 76.8929, GNorm = 1.0942, lr_0 = 9.8537e-04
Loss = 1.7005e-01, PNorm = 77.0411, GNorm = 0.7892, lr_0 = 9.8912e-04
Loss = 1.7971e-01, PNorm = 77.1835, GNorm = 0.6861, lr_0 = 9.9288e-04
Loss = 1.6788e-01, PNorm = 77.3315, GNorm = 1.0088, lr_0 = 9.9663e-04
Loss = 1.8427e-01, PNorm = 77.4808, GNorm = 0.8443, lr_0 = 9.9993e-04
Validation mae = 0.534583
Epoch 2
Loss = 1.3360e-01, PNorm = 77.6097, GNorm = 0.6181, lr_0 = 9.9925e-04
Loss = 1.1176e-01, PNorm = 77.7237, GNorm = 0.6303, lr_0 = 9.9856e-04
Loss = 1.1971e-01, PNorm = 77.8221, GNorm = 0.7109, lr_0 = 9.9788e-04
Loss = 1.1750e-01, PNorm = 77.9352, GNorm = 0.7960, lr_0 = 9.9719e-04
Loss = 1.0060e-01, PNorm = 78.0261, GNorm = 0.7423, lr_0 = 9.9651e-04
Loss = 1.1753e-01, PNorm = 78.1324, GNorm = 0.7678, lr_0 = 9.9583e-04
Loss = 1.1290e-01, PNorm = 78.2423, GNorm = 0.6213, lr_0 = 9.9515e-04
Loss = 1.0197e-01, PNorm = 78.3445, GNorm = 0.5193, lr_0 = 9.9446e-04
Loss = 1.0065e-01, PNorm = 78.4290, GNorm = 0.4468, lr_0 = 9.9378e-04
Loss = 9.3079e-02, PNorm = 78.5277, GNorm = 0.6794, lr_0 = 9.9310e-04
Loss = 1.1954e-01, PNorm = 78.6139, GNorm = 0.6762, lr_0 = 9.9242e-04
Loss = 1.0398e-01, PNorm = 78.7168, GNorm = 0.7098, lr_0 = 9.9174e-04
Loss = 1.0219e-01, PNorm = 78.8202, GNorm = 0.4965, lr_0 = 9.9106e-04
Loss = 1.0535e-01, PNorm = 78.9172, GNorm = 0.4269, lr_0 = 9.9038e-04
Loss = 1.0934e-01, PNorm = 79.0159, GNorm = 0.6381, lr_0 = 9.8971e-04
Loss = 1.1529e-01, PNorm = 79.1233, GNorm = 0.9108, lr_0 = 9.8903e-04
Loss = 1.0328e-01, PNorm = 79.2394, GNorm = 0.7138, lr_0 = 9.8835e-04
Loss = 1.0199e-01, PNorm = 79.3523, GNorm = 0.6947, lr_0 = 9.8767e-04
Loss = 1.0297e-01, PNorm = 79.4523, GNorm = 0.6755, lr_0 = 9.8700e-04
Loss = 1.0250e-01, PNorm = 79.5680, GNorm = 0.8403, lr_0 = 9.8632e-04
Loss = 1.0281e-01, PNorm = 79.6712, GNorm = 0.3975, lr_0 = 9.8564e-04
Loss = 1.0302e-01, PNorm = 79.7730, GNorm = 0.4952, lr_0 = 9.8497e-04
Loss = 1.1919e-01, PNorm = 79.8771, GNorm = 0.5510, lr_0 = 9.8429e-04
Loss = 9.4213e-02, PNorm = 79.9662, GNorm = 0.5444, lr_0 = 9.8362e-04
Loss = 1.0724e-01, PNorm = 80.0708, GNorm = 1.3278, lr_0 = 9.8295e-04
Loss = 1.0585e-01, PNorm = 80.1627, GNorm = 0.5295, lr_0 = 9.8227e-04
Loss = 1.0394e-01, PNorm = 80.2576, GNorm = 0.4863, lr_0 = 9.8160e-04
Loss = 9.8911e-02, PNorm = 80.3594, GNorm = 0.5244, lr_0 = 9.8093e-04
Loss = 1.1381e-01, PNorm = 80.4521, GNorm = 0.4532, lr_0 = 9.8026e-04
Loss = 1.1656e-01, PNorm = 80.5658, GNorm = 0.4909, lr_0 = 9.7958e-04
Loss = 1.0687e-01, PNorm = 80.6710, GNorm = 0.5831, lr_0 = 9.7891e-04
Loss = 1.2721e-01, PNorm = 80.7757, GNorm = 1.1817, lr_0 = 9.7824e-04
Loss = 1.2013e-01, PNorm = 80.8844, GNorm = 0.5386, lr_0 = 9.7757e-04
Loss = 1.1779e-01, PNorm = 80.9966, GNorm = 0.5853, lr_0 = 9.7690e-04
Loss = 1.1859e-01, PNorm = 81.1134, GNorm = 0.5839, lr_0 = 9.7623e-04
Loss = 1.0149e-01, PNorm = 81.2155, GNorm = 0.8960, lr_0 = 9.7556e-04
Loss = 1.0745e-01, PNorm = 81.3160, GNorm = 0.4186, lr_0 = 9.7490e-04
Loss = 1.1405e-01, PNorm = 81.4223, GNorm = 0.7876, lr_0 = 9.7423e-04
Loss = 1.1627e-01, PNorm = 81.5358, GNorm = 0.9565, lr_0 = 9.7356e-04
Loss = 1.3832e-01, PNorm = 81.6348, GNorm = 0.6177, lr_0 = 9.7289e-04
Loss = 1.1291e-01, PNorm = 81.7532, GNorm = 0.5223, lr_0 = 9.7223e-04
Loss = 9.9829e-02, PNorm = 81.8587, GNorm = 0.4560, lr_0 = 9.7156e-04
Loss = 1.1030e-01, PNorm = 81.9649, GNorm = 0.4894, lr_0 = 9.7090e-04
Loss = 8.7389e-02, PNorm = 82.0512, GNorm = 0.7693, lr_0 = 9.7023e-04
Loss = 1.1698e-01, PNorm = 82.1483, GNorm = 0.5695, lr_0 = 9.6957e-04
Loss = 1.2189e-01, PNorm = 82.2528, GNorm = 0.7387, lr_0 = 9.6890e-04
Loss = 1.0368e-01, PNorm = 82.3520, GNorm = 0.5182, lr_0 = 9.6824e-04
Loss = 1.1111e-01, PNorm = 82.4556, GNorm = 0.7149, lr_0 = 9.6757e-04
Loss = 1.1604e-01, PNorm = 82.5579, GNorm = 0.5029, lr_0 = 9.6691e-04
Loss = 1.1584e-01, PNorm = 82.6712, GNorm = 0.7056, lr_0 = 9.6625e-04
Loss = 1.0581e-01, PNorm = 82.7878, GNorm = 0.5026, lr_0 = 9.6559e-04
Loss = 1.1516e-01, PNorm = 82.8933, GNorm = 0.6443, lr_0 = 9.6493e-04
Loss = 1.1432e-01, PNorm = 83.0014, GNorm = 0.7882, lr_0 = 9.6427e-04
Loss = 9.7801e-02, PNorm = 83.1139, GNorm = 0.8612, lr_0 = 9.6360e-04
Loss = 1.1436e-01, PNorm = 83.2266, GNorm = 0.6947, lr_0 = 9.6294e-04
Loss = 9.7259e-02, PNorm = 83.3327, GNorm = 0.4319, lr_0 = 9.6228e-04
Loss = 1.1491e-01, PNorm = 83.4386, GNorm = 0.9689, lr_0 = 9.6163e-04
Loss = 1.1528e-01, PNorm = 83.5463, GNorm = 0.5864, lr_0 = 9.6097e-04
Loss = 1.0606e-01, PNorm = 83.6632, GNorm = 0.7438, lr_0 = 9.6031e-04
Loss = 1.1443e-01, PNorm = 83.7550, GNorm = 1.1777, lr_0 = 9.5965e-04
Loss = 1.0071e-01, PNorm = 83.8609, GNorm = 0.4943, lr_0 = 9.5899e-04
Loss = 1.1860e-01, PNorm = 83.9649, GNorm = 0.5215, lr_0 = 9.5834e-04
Loss = 1.1139e-01, PNorm = 84.0635, GNorm = 0.6506, lr_0 = 9.5768e-04
Loss = 9.7196e-02, PNorm = 84.1619, GNorm = 0.4485, lr_0 = 9.5702e-04
Loss = 1.0165e-01, PNorm = 84.2565, GNorm = 0.9412, lr_0 = 9.5637e-04
Loss = 1.2089e-01, PNorm = 84.3545, GNorm = 0.9222, lr_0 = 9.5571e-04
Loss = 1.0094e-01, PNorm = 84.4630, GNorm = 0.4775, lr_0 = 9.5506e-04
Loss = 1.2402e-01, PNorm = 84.5666, GNorm = 1.2360, lr_0 = 9.5440e-04
Loss = 1.0177e-01, PNorm = 84.6715, GNorm = 0.4408, lr_0 = 9.5375e-04
Loss = 1.0170e-01, PNorm = 84.7732, GNorm = 0.3728, lr_0 = 9.5310e-04
Loss = 9.7499e-02, PNorm = 84.8758, GNorm = 1.0893, lr_0 = 9.5244e-04
Loss = 1.1188e-01, PNorm = 84.9724, GNorm = 0.4715, lr_0 = 9.5179e-04
Loss = 1.2109e-01, PNorm = 85.0860, GNorm = 0.5291, lr_0 = 9.5114e-04
Loss = 1.1263e-01, PNorm = 85.1920, GNorm = 0.7482, lr_0 = 9.5049e-04
Loss = 1.1722e-01, PNorm = 85.3031, GNorm = 0.6216, lr_0 = 9.4984e-04
Loss = 1.2545e-01, PNorm = 85.4213, GNorm = 0.5422, lr_0 = 9.4919e-04
Loss = 1.1355e-01, PNorm = 85.5404, GNorm = 0.9853, lr_0 = 9.4854e-04
Loss = 1.0900e-01, PNorm = 85.6397, GNorm = 0.4988, lr_0 = 9.4789e-04
Loss = 1.0024e-01, PNorm = 85.7495, GNorm = 0.5723, lr_0 = 9.4724e-04
Loss = 1.2013e-01, PNorm = 85.8597, GNorm = 0.6537, lr_0 = 9.4659e-04
Loss = 1.1021e-01, PNorm = 85.9700, GNorm = 0.3452, lr_0 = 9.4594e-04
Loss = 1.3316e-01, PNorm = 86.0903, GNorm = 0.8934, lr_0 = 9.4529e-04
Loss = 1.1704e-01, PNorm = 86.2083, GNorm = 0.4334, lr_0 = 9.4464e-04
Loss = 1.3419e-01, PNorm = 86.3332, GNorm = 0.7158, lr_0 = 9.4400e-04
Loss = 9.5680e-02, PNorm = 86.4439, GNorm = 0.4376, lr_0 = 9.4335e-04
Loss = 1.2252e-01, PNorm = 86.5499, GNorm = 1.1224, lr_0 = 9.4270e-04
Loss = 1.2774e-01, PNorm = 86.6674, GNorm = 0.6628, lr_0 = 9.4206e-04
Loss = 1.3541e-01, PNorm = 86.7791, GNorm = 0.7054, lr_0 = 9.4141e-04
Loss = 1.2288e-01, PNorm = 86.9028, GNorm = 0.6813, lr_0 = 9.4077e-04
Loss = 1.0317e-01, PNorm = 87.0112, GNorm = 0.5158, lr_0 = 9.4012e-04
Loss = 1.0660e-01, PNorm = 87.1189, GNorm = 1.0490, lr_0 = 9.3948e-04
Loss = 1.3751e-01, PNorm = 87.2274, GNorm = 0.6640, lr_0 = 9.3884e-04
Loss = 1.2513e-01, PNorm = 87.3557, GNorm = 0.9487, lr_0 = 9.3819e-04
Loss = 1.0987e-01, PNorm = 87.4684, GNorm = 0.5593, lr_0 = 9.3755e-04
Loss = 1.2553e-01, PNorm = 87.5808, GNorm = 0.9434, lr_0 = 9.3691e-04
Loss = 1.2530e-01, PNorm = 87.6856, GNorm = 0.9072, lr_0 = 9.3627e-04
Loss = 1.2282e-01, PNorm = 87.7995, GNorm = 1.2268, lr_0 = 9.3562e-04
Loss = 1.0814e-01, PNorm = 87.9065, GNorm = 0.5192, lr_0 = 9.3498e-04
Loss = 1.3052e-01, PNorm = 88.0080, GNorm = 0.8775, lr_0 = 9.3434e-04
Loss = 1.1134e-01, PNorm = 88.1161, GNorm = 1.0125, lr_0 = 9.3370e-04
Loss = 1.1117e-01, PNorm = 88.2275, GNorm = 1.0646, lr_0 = 9.3306e-04
Loss = 1.2713e-01, PNorm = 88.3314, GNorm = 0.6955, lr_0 = 9.3242e-04
Loss = 1.1278e-01, PNorm = 88.4324, GNorm = 0.7809, lr_0 = 9.3178e-04
Loss = 1.2056e-01, PNorm = 88.5384, GNorm = 0.6110, lr_0 = 9.3115e-04
Loss = 1.2120e-01, PNorm = 88.6443, GNorm = 0.6998, lr_0 = 9.3051e-04
Loss = 1.3583e-01, PNorm = 88.7700, GNorm = 0.4967, lr_0 = 9.2987e-04
Loss = 1.1954e-01, PNorm = 88.8832, GNorm = 0.8748, lr_0 = 9.2923e-04
Loss = 1.1799e-01, PNorm = 88.9901, GNorm = 0.9000, lr_0 = 9.2860e-04
Loss = 1.2771e-01, PNorm = 89.1126, GNorm = 0.6065, lr_0 = 9.2796e-04
Loss = 1.0796e-01, PNorm = 89.2196, GNorm = 0.6862, lr_0 = 9.2733e-04
Loss = 1.1685e-01, PNorm = 89.3263, GNorm = 0.7724, lr_0 = 9.2669e-04
Loss = 1.1574e-01, PNorm = 89.4313, GNorm = 1.0277, lr_0 = 9.2606e-04
Loss = 1.1293e-01, PNorm = 89.5268, GNorm = 0.7285, lr_0 = 9.2542e-04
Loss = 1.1515e-01, PNorm = 89.6301, GNorm = 0.6821, lr_0 = 9.2479e-04
Loss = 1.2583e-01, PNorm = 89.7312, GNorm = 0.7244, lr_0 = 9.2415e-04
Loss = 1.3239e-01, PNorm = 89.8326, GNorm = 0.8383, lr_0 = 9.2352e-04
Loss = 1.2389e-01, PNorm = 89.9303, GNorm = 0.4558, lr_0 = 9.2289e-04
Loss = 1.3177e-01, PNorm = 90.0306, GNorm = 0.5881, lr_0 = 9.2226e-04
Loss = 1.2505e-01, PNorm = 90.1314, GNorm = 0.4251, lr_0 = 9.2162e-04
Loss = 1.1874e-01, PNorm = 90.2354, GNorm = 0.9424, lr_0 = 9.2099e-04
Validation mae = 0.514148
Epoch 3
Loss = 6.7583e-02, PNorm = 90.3179, GNorm = 0.4790, lr_0 = 9.2036e-04
Loss = 7.3286e-02, PNorm = 90.3935, GNorm = 0.3658, lr_0 = 9.1973e-04
Loss = 6.3965e-02, PNorm = 90.4747, GNorm = 0.7339, lr_0 = 9.1910e-04
Loss = 6.5018e-02, PNorm = 90.5340, GNorm = 0.9817, lr_0 = 9.1847e-04
Loss = 7.1481e-02, PNorm = 90.6001, GNorm = 0.7643, lr_0 = 9.1784e-04
Loss = 7.1729e-02, PNorm = 90.6647, GNorm = 0.4780, lr_0 = 9.1721e-04
Loss = 7.1455e-02, PNorm = 90.7275, GNorm = 0.8168, lr_0 = 9.1658e-04
Loss = 6.7126e-02, PNorm = 90.7833, GNorm = 0.3862, lr_0 = 9.1596e-04
Loss = 6.8505e-02, PNorm = 90.8380, GNorm = 0.5564, lr_0 = 9.1533e-04
Loss = 7.0143e-02, PNorm = 90.8920, GNorm = 0.3416, lr_0 = 9.1470e-04
Loss = 7.0830e-02, PNorm = 90.9609, GNorm = 0.6223, lr_0 = 9.1408e-04
Loss = 6.5786e-02, PNorm = 91.0189, GNorm = 0.4271, lr_0 = 9.1345e-04
Loss = 5.6923e-02, PNorm = 91.0726, GNorm = 0.6379, lr_0 = 9.1282e-04
Loss = 6.9954e-02, PNorm = 91.1256, GNorm = 0.3866, lr_0 = 9.1220e-04
Loss = 6.7694e-02, PNorm = 91.1887, GNorm = 0.5287, lr_0 = 9.1157e-04
Loss = 6.2368e-02, PNorm = 91.2541, GNorm = 0.3921, lr_0 = 9.1095e-04
Loss = 6.0350e-02, PNorm = 91.3217, GNorm = 0.3806, lr_0 = 9.1032e-04
Loss = 6.9305e-02, PNorm = 91.3851, GNorm = 0.3150, lr_0 = 9.0970e-04
Loss = 6.1252e-02, PNorm = 91.4460, GNorm = 0.6946, lr_0 = 9.0908e-04
Loss = 7.1213e-02, PNorm = 91.5166, GNorm = 0.5547, lr_0 = 9.0846e-04
Loss = 5.9792e-02, PNorm = 91.5708, GNorm = 0.4170, lr_0 = 9.0783e-04
Loss = 5.4324e-02, PNorm = 91.6321, GNorm = 0.5280, lr_0 = 9.0721e-04
Loss = 5.5681e-02, PNorm = 91.6845, GNorm = 0.4810, lr_0 = 9.0659e-04
Loss = 6.6377e-02, PNorm = 91.7418, GNorm = 0.7030, lr_0 = 9.0597e-04
Loss = 6.6041e-02, PNorm = 91.8085, GNorm = 0.4088, lr_0 = 9.0535e-04
Loss = 5.9954e-02, PNorm = 91.8757, GNorm = 0.2945, lr_0 = 9.0473e-04
Loss = 6.3962e-02, PNorm = 91.9363, GNorm = 1.2148, lr_0 = 9.0411e-04
Loss = 6.4981e-02, PNorm = 91.9993, GNorm = 0.4678, lr_0 = 9.0349e-04
Loss = 7.0697e-02, PNorm = 92.0668, GNorm = 0.5679, lr_0 = 9.0287e-04
Loss = 6.1366e-02, PNorm = 92.1398, GNorm = 0.7643, lr_0 = 9.0225e-04
Loss = 7.7446e-02, PNorm = 92.2187, GNorm = 0.3374, lr_0 = 9.0163e-04
Loss = 6.1385e-02, PNorm = 92.2967, GNorm = 0.5720, lr_0 = 9.0102e-04
Loss = 6.3784e-02, PNorm = 92.3699, GNorm = 0.3143, lr_0 = 9.0040e-04
Loss = 7.2535e-02, PNorm = 92.4397, GNorm = 0.4495, lr_0 = 8.9978e-04
Loss = 6.5834e-02, PNorm = 92.5138, GNorm = 0.4736, lr_0 = 8.9916e-04
Loss = 8.0510e-02, PNorm = 92.5835, GNorm = 0.6078, lr_0 = 8.9855e-04
Loss = 6.6666e-02, PNorm = 92.6601, GNorm = 0.5024, lr_0 = 8.9793e-04
Loss = 7.6349e-02, PNorm = 92.7351, GNorm = 0.5047, lr_0 = 8.9732e-04
Loss = 6.4388e-02, PNorm = 92.8134, GNorm = 0.4325, lr_0 = 8.9670e-04
Loss = 6.9986e-02, PNorm = 92.8811, GNorm = 0.4132, lr_0 = 8.9609e-04
Loss = 6.1212e-02, PNorm = 92.9557, GNorm = 0.6357, lr_0 = 8.9548e-04
Loss = 6.1865e-02, PNorm = 93.0195, GNorm = 0.3839, lr_0 = 8.9486e-04
Loss = 6.7182e-02, PNorm = 93.0926, GNorm = 0.6681, lr_0 = 8.9425e-04
Loss = 6.9909e-02, PNorm = 93.1580, GNorm = 0.6542, lr_0 = 8.9364e-04
Loss = 6.6524e-02, PNorm = 93.2446, GNorm = 0.6007, lr_0 = 8.9302e-04
Loss = 6.8339e-02, PNorm = 93.3123, GNorm = 0.3206, lr_0 = 8.9241e-04
Loss = 6.9401e-02, PNorm = 93.3948, GNorm = 0.3834, lr_0 = 8.9180e-04
Loss = 6.1012e-02, PNorm = 93.4700, GNorm = 0.3639, lr_0 = 8.9119e-04
Loss = 6.8699e-02, PNorm = 93.5446, GNorm = 0.6292, lr_0 = 8.9058e-04
Loss = 7.2066e-02, PNorm = 93.6238, GNorm = 0.6615, lr_0 = 8.8997e-04
Loss = 6.6839e-02, PNorm = 93.6943, GNorm = 0.6709, lr_0 = 8.8936e-04
Loss = 7.1124e-02, PNorm = 93.7754, GNorm = 0.4854, lr_0 = 8.8875e-04
Loss = 7.2653e-02, PNorm = 93.8589, GNorm = 0.7264, lr_0 = 8.8814e-04
Loss = 6.4676e-02, PNorm = 93.9389, GNorm = 0.3509, lr_0 = 8.8753e-04
Loss = 7.6163e-02, PNorm = 94.0115, GNorm = 0.8428, lr_0 = 8.8693e-04
Loss = 7.2556e-02, PNorm = 94.1007, GNorm = 0.4265, lr_0 = 8.8632e-04
Loss = 7.3424e-02, PNorm = 94.1788, GNorm = 0.4038, lr_0 = 8.8571e-04
Loss = 7.5884e-02, PNorm = 94.2457, GNorm = 0.5829, lr_0 = 8.8510e-04
Loss = 6.4578e-02, PNorm = 94.3353, GNorm = 0.5659, lr_0 = 8.8450e-04
Loss = 8.1813e-02, PNorm = 94.4179, GNorm = 0.6472, lr_0 = 8.8389e-04
Loss = 6.8838e-02, PNorm = 94.5004, GNorm = 0.3637, lr_0 = 8.8329e-04
Loss = 7.0883e-02, PNorm = 94.5878, GNorm = 1.1098, lr_0 = 8.8268e-04
Loss = 8.0680e-02, PNorm = 94.6741, GNorm = 0.3368, lr_0 = 8.8208e-04
Loss = 7.3717e-02, PNorm = 94.7511, GNorm = 0.7434, lr_0 = 8.8147e-04
Loss = 7.3823e-02, PNorm = 94.8461, GNorm = 1.0267, lr_0 = 8.8087e-04
Loss = 6.7325e-02, PNorm = 94.9238, GNorm = 0.6333, lr_0 = 8.8026e-04
Loss = 9.2195e-02, PNorm = 95.0103, GNorm = 0.4725, lr_0 = 8.7966e-04
Loss = 7.4061e-02, PNorm = 95.0897, GNorm = 0.6079, lr_0 = 8.7906e-04
Loss = 6.8665e-02, PNorm = 95.1867, GNorm = 0.5268, lr_0 = 8.7846e-04
Loss = 6.8683e-02, PNorm = 95.2664, GNorm = 0.5354, lr_0 = 8.7785e-04
Loss = 8.2375e-02, PNorm = 95.3537, GNorm = 1.0444, lr_0 = 8.7725e-04
Loss = 7.0705e-02, PNorm = 95.4366, GNorm = 0.8240, lr_0 = 8.7665e-04
Loss = 6.6518e-02, PNorm = 95.5212, GNorm = 0.6216, lr_0 = 8.7605e-04
Loss = 8.0978e-02, PNorm = 95.6072, GNorm = 0.6640, lr_0 = 8.7545e-04
Loss = 7.2725e-02, PNorm = 95.7037, GNorm = 0.5551, lr_0 = 8.7485e-04
Loss = 6.8465e-02, PNorm = 95.7740, GNorm = 0.4829, lr_0 = 8.7425e-04
Loss = 8.0295e-02, PNorm = 95.8607, GNorm = 0.9560, lr_0 = 8.7365e-04
Loss = 7.7343e-02, PNorm = 95.9559, GNorm = 0.5920, lr_0 = 8.7306e-04
Loss = 6.9790e-02, PNorm = 96.0443, GNorm = 0.4491, lr_0 = 8.7246e-04
Loss = 7.1495e-02, PNorm = 96.1277, GNorm = 0.4734, lr_0 = 8.7186e-04
Loss = 6.7089e-02, PNorm = 96.2034, GNorm = 0.3443, lr_0 = 8.7126e-04
Loss = 7.6977e-02, PNorm = 96.2876, GNorm = 0.5135, lr_0 = 8.7067e-04
Loss = 7.5255e-02, PNorm = 96.3812, GNorm = 0.9057, lr_0 = 8.7007e-04
Loss = 7.2978e-02, PNorm = 96.4628, GNorm = 0.5539, lr_0 = 8.6947e-04
Loss = 7.4783e-02, PNorm = 96.5435, GNorm = 0.8531, lr_0 = 8.6888e-04
Loss = 8.1987e-02, PNorm = 96.6260, GNorm = 0.5751, lr_0 = 8.6828e-04
Loss = 8.0459e-02, PNorm = 96.7200, GNorm = 0.7813, lr_0 = 8.6769e-04
Loss = 6.8436e-02, PNorm = 96.8028, GNorm = 0.5264, lr_0 = 8.6709e-04
Loss = 7.6016e-02, PNorm = 96.8969, GNorm = 0.6041, lr_0 = 8.6650e-04
Loss = 8.1897e-02, PNorm = 96.9758, GNorm = 0.5640, lr_0 = 8.6590e-04
Loss = 7.7542e-02, PNorm = 97.0678, GNorm = 0.3170, lr_0 = 8.6531e-04
Loss = 7.6584e-02, PNorm = 97.1558, GNorm = 0.5802, lr_0 = 8.6472e-04
Loss = 7.6448e-02, PNorm = 97.2427, GNorm = 0.3195, lr_0 = 8.6413e-04
Loss = 6.9355e-02, PNorm = 97.3242, GNorm = 0.5699, lr_0 = 8.6353e-04
Loss = 7.7666e-02, PNorm = 97.4186, GNorm = 0.3651, lr_0 = 8.6294e-04
Loss = 7.1321e-02, PNorm = 97.5062, GNorm = 0.3172, lr_0 = 8.6235e-04
Loss = 8.2409e-02, PNorm = 97.5964, GNorm = 0.6939, lr_0 = 8.6176e-04
Loss = 8.3193e-02, PNorm = 97.6817, GNorm = 0.7989, lr_0 = 8.6117e-04
Loss = 7.0176e-02, PNorm = 97.7637, GNorm = 0.5502, lr_0 = 8.6058e-04
Loss = 8.6181e-02, PNorm = 97.8598, GNorm = 0.4620, lr_0 = 8.5999e-04
Loss = 8.1484e-02, PNorm = 97.9501, GNorm = 0.7313, lr_0 = 8.5940e-04
Loss = 8.1060e-02, PNorm = 98.0429, GNorm = 0.5758, lr_0 = 8.5881e-04
Loss = 6.7998e-02, PNorm = 98.1243, GNorm = 0.5439, lr_0 = 8.5823e-04
Loss = 7.9204e-02, PNorm = 98.2141, GNorm = 0.8910, lr_0 = 8.5764e-04
Loss = 7.0103e-02, PNorm = 98.3019, GNorm = 0.9509, lr_0 = 8.5705e-04
Loss = 7.9689e-02, PNorm = 98.3830, GNorm = 0.4638, lr_0 = 8.5646e-04
Loss = 7.8081e-02, PNorm = 98.4782, GNorm = 0.3343, lr_0 = 8.5588e-04
Loss = 7.9937e-02, PNorm = 98.5727, GNorm = 0.4401, lr_0 = 8.5529e-04
Loss = 8.0560e-02, PNorm = 98.6571, GNorm = 0.3989, lr_0 = 8.5470e-04
Loss = 7.4720e-02, PNorm = 98.7563, GNorm = 0.5069, lr_0 = 8.5412e-04
Loss = 7.9362e-02, PNorm = 98.8529, GNorm = 0.3368, lr_0 = 8.5353e-04
Loss = 7.8733e-02, PNorm = 98.9516, GNorm = 0.7670, lr_0 = 8.5295e-04
Loss = 7.9927e-02, PNorm = 99.0416, GNorm = 0.7398, lr_0 = 8.5236e-04
Loss = 7.3190e-02, PNorm = 99.1435, GNorm = 1.3113, lr_0 = 8.5178e-04
Loss = 7.4226e-02, PNorm = 99.2463, GNorm = 0.4175, lr_0 = 8.5120e-04
Loss = 8.1405e-02, PNorm = 99.3401, GNorm = 0.7441, lr_0 = 8.5061e-04
Loss = 7.8122e-02, PNorm = 99.4348, GNorm = 0.9649, lr_0 = 8.5003e-04
Loss = 7.0438e-02, PNorm = 99.5282, GNorm = 0.5770, lr_0 = 8.4945e-04
Loss = 7.2420e-02, PNorm = 99.6147, GNorm = 0.6746, lr_0 = 8.4887e-04
Loss = 8.1335e-02, PNorm = 99.7039, GNorm = 0.4687, lr_0 = 8.4828e-04
Validation mae = 0.500480
Epoch 4
Loss = 4.5889e-02, PNorm = 99.7884, GNorm = 0.5042, lr_0 = 8.4770e-04
Loss = 5.5489e-02, PNorm = 99.8554, GNorm = 0.4334, lr_0 = 8.4712e-04
Loss = 6.2309e-02, PNorm = 99.9243, GNorm = 0.6142, lr_0 = 8.4654e-04
Loss = 4.5822e-02, PNorm = 99.9850, GNorm = 0.3568, lr_0 = 8.4596e-04
Loss = 5.2051e-02, PNorm = 100.0424, GNorm = 0.7040, lr_0 = 8.4538e-04
Loss = 4.4501e-02, PNorm = 100.1010, GNorm = 0.7585, lr_0 = 8.4480e-04
Loss = 4.5597e-02, PNorm = 100.1509, GNorm = 0.4252, lr_0 = 8.4423e-04
Loss = 4.4495e-02, PNorm = 100.2052, GNorm = 0.2599, lr_0 = 8.4365e-04
Loss = 4.2522e-02, PNorm = 100.2576, GNorm = 0.2808, lr_0 = 8.4307e-04
Loss = 4.7507e-02, PNorm = 100.3058, GNorm = 0.4315, lr_0 = 8.4249e-04
Loss = 4.1861e-02, PNorm = 100.3568, GNorm = 0.5558, lr_0 = 8.4191e-04
Loss = 4.2623e-02, PNorm = 100.4056, GNorm = 0.3557, lr_0 = 8.4134e-04
Loss = 3.9784e-02, PNorm = 100.4594, GNorm = 0.2917, lr_0 = 8.4076e-04
Loss = 4.5883e-02, PNorm = 100.5093, GNorm = 0.7425, lr_0 = 8.4019e-04
Loss = 3.9228e-02, PNorm = 100.5642, GNorm = 0.5469, lr_0 = 8.3961e-04
Loss = 5.0010e-02, PNorm = 100.6166, GNorm = 0.5628, lr_0 = 8.3903e-04
Loss = 4.1940e-02, PNorm = 100.6710, GNorm = 0.4826, lr_0 = 8.3846e-04
Loss = 4.6007e-02, PNorm = 100.7332, GNorm = 0.2610, lr_0 = 8.3789e-04
Loss = 5.0151e-02, PNorm = 100.7929, GNorm = 0.5959, lr_0 = 8.3731e-04
Loss = 4.2983e-02, PNorm = 100.8627, GNorm = 0.3171, lr_0 = 8.3674e-04
Loss = 4.2794e-02, PNorm = 100.9172, GNorm = 0.6552, lr_0 = 8.3616e-04
Loss = 4.7190e-02, PNorm = 100.9674, GNorm = 0.3436, lr_0 = 8.3559e-04
Loss = 3.7082e-02, PNorm = 101.0162, GNorm = 0.3559, lr_0 = 8.3502e-04
Loss = 4.7572e-02, PNorm = 101.0569, GNorm = 0.4868, lr_0 = 8.3445e-04
Loss = 4.4357e-02, PNorm = 101.1067, GNorm = 0.2906, lr_0 = 8.3388e-04
Loss = 4.6908e-02, PNorm = 101.1574, GNorm = 0.2961, lr_0 = 8.3330e-04
Loss = 4.4130e-02, PNorm = 101.2126, GNorm = 0.3670, lr_0 = 8.3273e-04
Loss = 4.0606e-02, PNorm = 101.2697, GNorm = 0.3914, lr_0 = 8.3216e-04
Loss = 4.2137e-02, PNorm = 101.3226, GNorm = 0.3143, lr_0 = 8.3159e-04
Loss = 4.3810e-02, PNorm = 101.3744, GNorm = 0.2236, lr_0 = 8.3102e-04
Loss = 4.4867e-02, PNorm = 101.4283, GNorm = 0.5066, lr_0 = 8.3045e-04
Loss = 4.4630e-02, PNorm = 101.4875, GNorm = 1.1362, lr_0 = 8.2988e-04
Loss = 4.5602e-02, PNorm = 101.5502, GNorm = 0.3760, lr_0 = 8.2932e-04
Loss = 4.7039e-02, PNorm = 101.6133, GNorm = 1.1586, lr_0 = 8.2875e-04
Loss = 4.2954e-02, PNorm = 101.6739, GNorm = 0.8019, lr_0 = 8.2818e-04
Loss = 4.4452e-02, PNorm = 101.7251, GNorm = 0.2762, lr_0 = 8.2761e-04
Loss = 4.4402e-02, PNorm = 101.7776, GNorm = 0.6216, lr_0 = 8.2705e-04
Loss = 5.1960e-02, PNorm = 101.8372, GNorm = 0.2998, lr_0 = 8.2648e-04
Loss = 4.5628e-02, PNorm = 101.9022, GNorm = 0.4999, lr_0 = 8.2591e-04
Loss = 4.6337e-02, PNorm = 101.9606, GNorm = 0.5561, lr_0 = 8.2535e-04
Loss = 5.2624e-02, PNorm = 102.0257, GNorm = 0.4255, lr_0 = 8.2478e-04
Loss = 4.0899e-02, PNorm = 102.1002, GNorm = 0.7713, lr_0 = 8.2422e-04
Loss = 5.0472e-02, PNorm = 102.1689, GNorm = 0.6280, lr_0 = 8.2365e-04
Loss = 4.4227e-02, PNorm = 102.2309, GNorm = 0.3621, lr_0 = 8.2309e-04
Loss = 4.1817e-02, PNorm = 102.3025, GNorm = 0.4151, lr_0 = 8.2252e-04
Loss = 4.3456e-02, PNorm = 102.3628, GNorm = 0.4588, lr_0 = 8.2196e-04
Loss = 4.9110e-02, PNorm = 102.4213, GNorm = 0.3521, lr_0 = 8.2140e-04
Loss = 4.9349e-02, PNorm = 102.4810, GNorm = 0.3909, lr_0 = 8.2084e-04
Loss = 4.7299e-02, PNorm = 102.5396, GNorm = 0.4165, lr_0 = 8.2027e-04
Loss = 4.3981e-02, PNorm = 102.6033, GNorm = 0.3129, lr_0 = 8.1971e-04
Loss = 5.2405e-02, PNorm = 102.6621, GNorm = 0.3494, lr_0 = 8.1915e-04
Loss = 4.9543e-02, PNorm = 102.7243, GNorm = 0.3684, lr_0 = 8.1859e-04
Loss = 4.8844e-02, PNorm = 102.7831, GNorm = 0.4980, lr_0 = 8.1803e-04
Loss = 4.3146e-02, PNorm = 102.8473, GNorm = 0.3652, lr_0 = 8.1747e-04
Loss = 4.2238e-02, PNorm = 102.9186, GNorm = 0.2551, lr_0 = 8.1691e-04
Loss = 4.9858e-02, PNorm = 102.9912, GNorm = 0.6374, lr_0 = 8.1635e-04
Loss = 5.1229e-02, PNorm = 103.0507, GNorm = 0.6107, lr_0 = 8.1579e-04
Loss = 5.2848e-02, PNorm = 103.1173, GNorm = 0.6107, lr_0 = 8.1523e-04
Loss = 4.3254e-02, PNorm = 103.1842, GNorm = 0.3018, lr_0 = 8.1467e-04
Loss = 4.8600e-02, PNorm = 103.2448, GNorm = 0.4924, lr_0 = 8.1411e-04
Loss = 5.3215e-02, PNorm = 103.3122, GNorm = 0.6556, lr_0 = 8.1355e-04
Loss = 5.2087e-02, PNorm = 103.3928, GNorm = 0.3888, lr_0 = 8.1300e-04
Loss = 5.7422e-02, PNorm = 103.4636, GNorm = 0.8503, lr_0 = 8.1244e-04
Loss = 4.5369e-02, PNorm = 103.5354, GNorm = 0.4483, lr_0 = 8.1188e-04
Loss = 5.3264e-02, PNorm = 103.5976, GNorm = 0.6778, lr_0 = 8.1133e-04
Loss = 5.5239e-02, PNorm = 103.6718, GNorm = 0.7785, lr_0 = 8.1077e-04
Loss = 4.8204e-02, PNorm = 103.7447, GNorm = 0.6720, lr_0 = 8.1022e-04
Loss = 5.3917e-02, PNorm = 103.8114, GNorm = 0.5131, lr_0 = 8.0966e-04
Loss = 4.8510e-02, PNorm = 103.8793, GNorm = 0.2883, lr_0 = 8.0911e-04
Loss = 4.7438e-02, PNorm = 103.9496, GNorm = 0.5278, lr_0 = 8.0855e-04
Loss = 6.0044e-02, PNorm = 104.0202, GNorm = 0.8352, lr_0 = 8.0800e-04
Loss = 4.8087e-02, PNorm = 104.1017, GNorm = 0.4345, lr_0 = 8.0745e-04
Loss = 6.1375e-02, PNorm = 104.1725, GNorm = 0.5171, lr_0 = 8.0689e-04
Loss = 5.9602e-02, PNorm = 104.2475, GNorm = 0.5341, lr_0 = 8.0634e-04
Loss = 5.8018e-02, PNorm = 104.3396, GNorm = 0.6073, lr_0 = 8.0579e-04
Loss = 6.9021e-02, PNorm = 104.4213, GNorm = 0.8030, lr_0 = 8.0523e-04
Loss = 5.2107e-02, PNorm = 104.5043, GNorm = 0.6542, lr_0 = 8.0468e-04
Loss = 4.7840e-02, PNorm = 104.5854, GNorm = 0.4675, lr_0 = 8.0413e-04
Loss = 4.6609e-02, PNorm = 104.6541, GNorm = 0.2586, lr_0 = 8.0358e-04
Loss = 6.0534e-02, PNorm = 104.7247, GNorm = 0.3763, lr_0 = 8.0303e-04
Loss = 4.7962e-02, PNorm = 104.8004, GNorm = 0.6837, lr_0 = 8.0248e-04
Loss = 5.0507e-02, PNorm = 104.8804, GNorm = 0.7406, lr_0 = 8.0193e-04
Loss = 4.8714e-02, PNorm = 104.9554, GNorm = 0.6715, lr_0 = 8.0138e-04
Loss = 4.7759e-02, PNorm = 105.0261, GNorm = 0.3317, lr_0 = 8.0083e-04
Loss = 4.8302e-02, PNorm = 105.1029, GNorm = 0.5555, lr_0 = 8.0028e-04
Loss = 4.6994e-02, PNorm = 105.1803, GNorm = 0.6378, lr_0 = 7.9974e-04
Loss = 5.7505e-02, PNorm = 105.2489, GNorm = 0.3444, lr_0 = 7.9919e-04
Loss = 5.5275e-02, PNorm = 105.3318, GNorm = 0.7246, lr_0 = 7.9864e-04
Loss = 4.8537e-02, PNorm = 105.4116, GNorm = 0.2657, lr_0 = 7.9809e-04
Loss = 5.3251e-02, PNorm = 105.5004, GNorm = 0.4978, lr_0 = 7.9755e-04
Loss = 5.1519e-02, PNorm = 105.5822, GNorm = 0.5160, lr_0 = 7.9700e-04
Loss = 5.3251e-02, PNorm = 105.6643, GNorm = 0.3720, lr_0 = 7.9645e-04
Loss = 4.6834e-02, PNorm = 105.7300, GNorm = 0.3474, lr_0 = 7.9591e-04
Loss = 5.5542e-02, PNorm = 105.8010, GNorm = 0.4394, lr_0 = 7.9536e-04
Loss = 5.0297e-02, PNorm = 105.8731, GNorm = 0.3658, lr_0 = 7.9482e-04
Loss = 5.0863e-02, PNorm = 105.9475, GNorm = 0.4006, lr_0 = 7.9427e-04
Loss = 5.6899e-02, PNorm = 106.0169, GNorm = 0.4212, lr_0 = 7.9373e-04
Loss = 5.2003e-02, PNorm = 106.0891, GNorm = 0.7464, lr_0 = 7.9319e-04
Loss = 4.5668e-02, PNorm = 106.1569, GNorm = 0.8787, lr_0 = 7.9264e-04
Loss = 5.8189e-02, PNorm = 106.2254, GNorm = 0.5676, lr_0 = 7.9210e-04
Loss = 4.5407e-02, PNorm = 106.2955, GNorm = 0.3151, lr_0 = 7.9156e-04
Loss = 5.1196e-02, PNorm = 106.3674, GNorm = 0.3311, lr_0 = 7.9101e-04
Loss = 5.0814e-02, PNorm = 106.4377, GNorm = 0.3333, lr_0 = 7.9047e-04
Loss = 4.8524e-02, PNorm = 106.5119, GNorm = 0.3398, lr_0 = 7.8993e-04
Loss = 5.5226e-02, PNorm = 106.5814, GNorm = 0.6323, lr_0 = 7.8939e-04
Loss = 4.8332e-02, PNorm = 106.6595, GNorm = 0.2981, lr_0 = 7.8885e-04
Loss = 5.4908e-02, PNorm = 106.7387, GNorm = 0.5944, lr_0 = 7.8831e-04
Loss = 5.9272e-02, PNorm = 106.8147, GNorm = 0.8609, lr_0 = 7.8777e-04
Loss = 5.7929e-02, PNorm = 106.9042, GNorm = 0.7034, lr_0 = 7.8723e-04
Loss = 5.3386e-02, PNorm = 106.9814, GNorm = 0.2938, lr_0 = 7.8669e-04
Loss = 5.2354e-02, PNorm = 107.0645, GNorm = 0.3528, lr_0 = 7.8615e-04
Loss = 5.3639e-02, PNorm = 107.1504, GNorm = 0.4868, lr_0 = 7.8561e-04
Loss = 4.8637e-02, PNorm = 107.2297, GNorm = 0.4744, lr_0 = 7.8507e-04
Loss = 5.7738e-02, PNorm = 107.3164, GNorm = 0.2859, lr_0 = 7.8454e-04
Loss = 4.4714e-02, PNorm = 107.3874, GNorm = 0.3356, lr_0 = 7.8400e-04
Loss = 5.8723e-02, PNorm = 107.4681, GNorm = 0.3572, lr_0 = 7.8346e-04
Loss = 4.9033e-02, PNorm = 107.5464, GNorm = 0.5520, lr_0 = 7.8293e-04
Loss = 5.6075e-02, PNorm = 107.6154, GNorm = 0.5792, lr_0 = 7.8239e-04
Loss = 5.7678e-02, PNorm = 107.6937, GNorm = 0.5091, lr_0 = 7.8185e-04
Loss = 5.2962e-02, PNorm = 107.7744, GNorm = 0.7185, lr_0 = 7.8132e-04
Validation mae = 0.495587
Epoch 5
Loss = 4.0558e-02, PNorm = 107.8377, GNorm = 0.3510, lr_0 = 7.8078e-04
Loss = 3.7169e-02, PNorm = 107.8937, GNorm = 0.6108, lr_0 = 7.8025e-04
Loss = 3.3831e-02, PNorm = 107.9408, GNorm = 0.6718, lr_0 = 7.7971e-04
Loss = 4.2591e-02, PNorm = 107.9882, GNorm = 0.3913, lr_0 = 7.7918e-04
Loss = 3.4558e-02, PNorm = 108.0381, GNorm = 0.3894, lr_0 = 7.7864e-04
Loss = 4.0770e-02, PNorm = 108.0931, GNorm = 0.2493, lr_0 = 7.7811e-04
Loss = 3.6475e-02, PNorm = 108.1486, GNorm = 1.0924, lr_0 = 7.7758e-04
Loss = 3.5160e-02, PNorm = 108.1997, GNorm = 0.5964, lr_0 = 7.7705e-04
Loss = 3.9854e-02, PNorm = 108.2469, GNorm = 0.6406, lr_0 = 7.7651e-04
Loss = 4.0466e-02, PNorm = 108.3081, GNorm = 0.4490, lr_0 = 7.7598e-04
Loss = 3.5042e-02, PNorm = 108.3585, GNorm = 0.5700, lr_0 = 7.7545e-04
Loss = 3.1773e-02, PNorm = 108.3989, GNorm = 0.4367, lr_0 = 7.7492e-04
Loss = 3.1827e-02, PNorm = 108.4474, GNorm = 0.2302, lr_0 = 7.7439e-04
Loss = 3.3854e-02, PNorm = 108.4988, GNorm = 0.6277, lr_0 = 7.7386e-04
Loss = 3.4442e-02, PNorm = 108.5488, GNorm = 0.2201, lr_0 = 7.7333e-04
Loss = 3.9079e-02, PNorm = 108.5943, GNorm = 0.3261, lr_0 = 7.7280e-04
Loss = 3.6452e-02, PNorm = 108.6508, GNorm = 0.2358, lr_0 = 7.7227e-04
Loss = 3.5261e-02, PNorm = 108.7028, GNorm = 0.4250, lr_0 = 7.7174e-04
Loss = 3.4240e-02, PNorm = 108.7547, GNorm = 0.4639, lr_0 = 7.7121e-04
Loss = 3.2725e-02, PNorm = 108.8020, GNorm = 0.2369, lr_0 = 7.7068e-04
Loss = 3.5838e-02, PNorm = 108.8547, GNorm = 0.2992, lr_0 = 7.7015e-04
Loss = 3.7573e-02, PNorm = 108.9071, GNorm = 0.3749, lr_0 = 7.6963e-04
Loss = 3.4349e-02, PNorm = 108.9573, GNorm = 0.7271, lr_0 = 7.6910e-04
Loss = 3.1945e-02, PNorm = 109.0116, GNorm = 0.4456, lr_0 = 7.6857e-04
Loss = 4.2912e-02, PNorm = 109.0531, GNorm = 0.6415, lr_0 = 7.6805e-04
Loss = 4.6767e-02, PNorm = 109.1180, GNorm = 0.4710, lr_0 = 7.6752e-04
Loss = 3.5391e-02, PNorm = 109.1772, GNorm = 0.3644, lr_0 = 7.6699e-04
Loss = 3.6686e-02, PNorm = 109.2352, GNorm = 0.4257, lr_0 = 7.6647e-04
Loss = 3.4487e-02, PNorm = 109.2886, GNorm = 0.8984, lr_0 = 7.6594e-04
Loss = 3.8913e-02, PNorm = 109.3433, GNorm = 0.4687, lr_0 = 7.6542e-04
Loss = 3.3333e-02, PNorm = 109.4002, GNorm = 0.7027, lr_0 = 7.6489e-04
Loss = 3.3644e-02, PNorm = 109.4534, GNorm = 0.2503, lr_0 = 7.6437e-04
Loss = 3.3816e-02, PNorm = 109.4975, GNorm = 0.4555, lr_0 = 7.6385e-04
Loss = 3.2959e-02, PNorm = 109.5491, GNorm = 0.3224, lr_0 = 7.6332e-04
Loss = 3.3331e-02, PNorm = 109.5925, GNorm = 0.2320, lr_0 = 7.6280e-04
Loss = 3.7736e-02, PNorm = 109.6408, GNorm = 0.5373, lr_0 = 7.6228e-04
Loss = 3.3243e-02, PNorm = 109.7009, GNorm = 0.3613, lr_0 = 7.6176e-04
Loss = 3.5148e-02, PNorm = 109.7603, GNorm = 0.4966, lr_0 = 7.6123e-04
Loss = 3.6230e-02, PNorm = 109.8200, GNorm = 0.2067, lr_0 = 7.6071e-04
Loss = 3.4505e-02, PNorm = 109.8710, GNorm = 0.2909, lr_0 = 7.6019e-04
Loss = 3.2576e-02, PNorm = 109.9246, GNorm = 0.6171, lr_0 = 7.5967e-04
Loss = 3.6031e-02, PNorm = 109.9724, GNorm = 0.4394, lr_0 = 7.5915e-04
Loss = 3.4167e-02, PNorm = 110.0252, GNorm = 0.3207, lr_0 = 7.5863e-04
Loss = 4.0631e-02, PNorm = 110.0771, GNorm = 0.5108, lr_0 = 7.5811e-04
Loss = 4.2990e-02, PNorm = 110.1350, GNorm = 0.8371, lr_0 = 7.5759e-04
Loss = 3.7961e-02, PNorm = 110.1960, GNorm = 0.2590, lr_0 = 7.5707e-04
Loss = 3.0398e-02, PNorm = 110.2510, GNorm = 0.3299, lr_0 = 7.5655e-04
Loss = 3.0443e-02, PNorm = 110.3001, GNorm = 0.2616, lr_0 = 7.5603e-04
Loss = 3.6502e-02, PNorm = 110.3447, GNorm = 0.2972, lr_0 = 7.5552e-04
Loss = 3.3523e-02, PNorm = 110.3955, GNorm = 0.3657, lr_0 = 7.5500e-04
Loss = 4.8551e-02, PNorm = 110.4446, GNorm = 0.3863, lr_0 = 7.5448e-04
Loss = 3.8303e-02, PNorm = 110.5019, GNorm = 0.6217, lr_0 = 7.5397e-04
Loss = 3.6227e-02, PNorm = 110.5544, GNorm = 0.3031, lr_0 = 7.5345e-04
Loss = 3.7173e-02, PNorm = 110.6139, GNorm = 0.7329, lr_0 = 7.5293e-04
Loss = 4.2427e-02, PNorm = 110.6625, GNorm = 0.2729, lr_0 = 7.5242e-04
Loss = 3.8654e-02, PNorm = 110.7198, GNorm = 0.5168, lr_0 = 7.5190e-04
Loss = 3.7112e-02, PNorm = 110.7788, GNorm = 0.4960, lr_0 = 7.5139e-04
Loss = 3.5916e-02, PNorm = 110.8388, GNorm = 0.9145, lr_0 = 7.5087e-04
Loss = 4.1408e-02, PNorm = 110.9060, GNorm = 0.8118, lr_0 = 7.5036e-04
Loss = 3.9932e-02, PNorm = 110.9776, GNorm = 0.5359, lr_0 = 7.4984e-04
Loss = 3.7658e-02, PNorm = 111.0498, GNorm = 0.2645, lr_0 = 7.4933e-04
Loss = 3.6300e-02, PNorm = 111.1159, GNorm = 0.4594, lr_0 = 7.4882e-04
Loss = 3.5011e-02, PNorm = 111.1727, GNorm = 0.2491, lr_0 = 7.4830e-04
Loss = 3.7626e-02, PNorm = 111.2303, GNorm = 0.4009, lr_0 = 7.4779e-04
Loss = 3.7494e-02, PNorm = 111.2873, GNorm = 0.3511, lr_0 = 7.4728e-04
Loss = 3.5287e-02, PNorm = 111.3453, GNorm = 0.2598, lr_0 = 7.4677e-04
Loss = 4.7113e-02, PNorm = 111.4053, GNorm = 0.7001, lr_0 = 7.4625e-04
Loss = 3.8304e-02, PNorm = 111.4581, GNorm = 0.4399, lr_0 = 7.4574e-04
Loss = 2.8157e-02, PNorm = 111.5144, GNorm = 0.4210, lr_0 = 7.4523e-04
Loss = 3.5886e-02, PNorm = 111.5700, GNorm = 0.7208, lr_0 = 7.4472e-04
Loss = 3.8253e-02, PNorm = 111.6313, GNorm = 0.2141, lr_0 = 7.4421e-04
Loss = 3.9125e-02, PNorm = 111.6942, GNorm = 0.8394, lr_0 = 7.4370e-04
Loss = 3.3914e-02, PNorm = 111.7626, GNorm = 0.2341, lr_0 = 7.4319e-04
Loss = 4.0200e-02, PNorm = 111.8270, GNorm = 0.4751, lr_0 = 7.4268e-04
Loss = 3.7206e-02, PNorm = 111.8940, GNorm = 0.3827, lr_0 = 7.4217e-04
Loss = 4.0443e-02, PNorm = 111.9595, GNorm = 0.2569, lr_0 = 7.4167e-04
Loss = 3.5117e-02, PNorm = 112.0231, GNorm = 0.4351, lr_0 = 7.4116e-04
Loss = 3.3449e-02, PNorm = 112.0916, GNorm = 0.2241, lr_0 = 7.4065e-04
Loss = 3.1722e-02, PNorm = 112.1511, GNorm = 0.7521, lr_0 = 7.4014e-04
Loss = 4.0706e-02, PNorm = 112.2066, GNorm = 0.4580, lr_0 = 7.3964e-04
Loss = 3.2351e-02, PNorm = 112.2753, GNorm = 0.5258, lr_0 = 7.3913e-04
Loss = 3.9029e-02, PNorm = 112.3417, GNorm = 0.1933, lr_0 = 7.3862e-04
Loss = 4.5346e-02, PNorm = 112.4063, GNorm = 0.4020, lr_0 = 7.3812e-04
Loss = 4.1589e-02, PNorm = 112.4653, GNorm = 0.3210, lr_0 = 7.3761e-04
Loss = 3.9032e-02, PNorm = 112.5241, GNorm = 0.7631, lr_0 = 7.3711e-04
Loss = 3.8642e-02, PNorm = 112.5808, GNorm = 0.3514, lr_0 = 7.3660e-04
Loss = 3.2626e-02, PNorm = 112.6368, GNorm = 0.6266, lr_0 = 7.3610e-04
Loss = 4.6256e-02, PNorm = 112.7034, GNorm = 0.8997, lr_0 = 7.3559e-04
Loss = 3.8546e-02, PNorm = 112.7722, GNorm = 0.6977, lr_0 = 7.3509e-04
Loss = 3.8010e-02, PNorm = 112.8402, GNorm = 0.6496, lr_0 = 7.3458e-04
Loss = 4.2040e-02, PNorm = 112.9013, GNorm = 0.3658, lr_0 = 7.3408e-04
Loss = 3.9518e-02, PNorm = 112.9674, GNorm = 0.2734, lr_0 = 7.3358e-04
Loss = 4.1227e-02, PNorm = 113.0367, GNorm = 0.2194, lr_0 = 7.3308e-04
Loss = 3.8932e-02, PNorm = 113.0982, GNorm = 0.4678, lr_0 = 7.3257e-04
Loss = 3.6786e-02, PNorm = 113.1648, GNorm = 0.2676, lr_0 = 7.3207e-04
Loss = 4.2488e-02, PNorm = 113.2284, GNorm = 0.4511, lr_0 = 7.3157e-04
Loss = 3.5249e-02, PNorm = 113.2973, GNorm = 0.4296, lr_0 = 7.3107e-04
Loss = 3.8539e-02, PNorm = 113.3567, GNorm = 0.4022, lr_0 = 7.3057e-04
Loss = 3.4238e-02, PNorm = 113.4274, GNorm = 0.4083, lr_0 = 7.3007e-04
Loss = 3.8233e-02, PNorm = 113.4925, GNorm = 0.3117, lr_0 = 7.2957e-04
Loss = 3.1940e-02, PNorm = 113.5563, GNorm = 0.3519, lr_0 = 7.2907e-04
Loss = 4.1081e-02, PNorm = 113.6173, GNorm = 0.1742, lr_0 = 7.2857e-04
Loss = 4.3117e-02, PNorm = 113.6828, GNorm = 0.4522, lr_0 = 7.2807e-04
Loss = 3.7927e-02, PNorm = 113.7543, GNorm = 0.4109, lr_0 = 7.2757e-04
Loss = 4.0175e-02, PNorm = 113.8182, GNorm = 0.3555, lr_0 = 7.2707e-04
Loss = 3.9101e-02, PNorm = 113.8856, GNorm = 0.3707, lr_0 = 7.2657e-04
Loss = 4.0132e-02, PNorm = 113.9510, GNorm = 0.3641, lr_0 = 7.2608e-04
Loss = 4.6805e-02, PNorm = 114.0130, GNorm = 0.9829, lr_0 = 7.2558e-04
Loss = 5.3270e-02, PNorm = 114.0870, GNorm = 0.5994, lr_0 = 7.2508e-04
Loss = 4.4077e-02, PNorm = 114.1703, GNorm = 0.7679, lr_0 = 7.2458e-04
Loss = 3.6872e-02, PNorm = 114.2516, GNorm = 0.3320, lr_0 = 7.2409e-04
Loss = 4.1118e-02, PNorm = 114.3197, GNorm = 0.6728, lr_0 = 7.2359e-04
Loss = 3.9338e-02, PNorm = 114.3843, GNorm = 0.3281, lr_0 = 7.2310e-04
Loss = 4.4121e-02, PNorm = 114.4506, GNorm = 0.2753, lr_0 = 7.2260e-04
Loss = 3.7407e-02, PNorm = 114.5179, GNorm = 0.4257, lr_0 = 7.2211e-04
Loss = 4.3913e-02, PNorm = 114.5863, GNorm = 0.3438, lr_0 = 7.2161e-04
Loss = 3.8193e-02, PNorm = 114.6529, GNorm = 0.2594, lr_0 = 7.2112e-04
Loss = 4.1686e-02, PNorm = 114.7252, GNorm = 0.3720, lr_0 = 7.2062e-04
Loss = 4.1446e-02, PNorm = 114.7920, GNorm = 0.7647, lr_0 = 7.2013e-04
Loss = 4.0888e-02, PNorm = 114.8617, GNorm = 0.3267, lr_0 = 7.1964e-04
Validation mae = 0.492006
Epoch 6
Loss = 3.3809e-02, PNorm = 114.9173, GNorm = 0.6906, lr_0 = 7.1914e-04
Loss = 3.0619e-02, PNorm = 114.9628, GNorm = 0.3067, lr_0 = 7.1865e-04
Loss = 3.0115e-02, PNorm = 115.0133, GNorm = 0.6567, lr_0 = 7.1816e-04
Loss = 2.5940e-02, PNorm = 115.0563, GNorm = 0.1965, lr_0 = 7.1767e-04
Loss = 2.6193e-02, PNorm = 115.0955, GNorm = 0.3690, lr_0 = 7.1717e-04
Loss = 2.6656e-02, PNorm = 115.1308, GNorm = 0.2947, lr_0 = 7.1668e-04
Loss = 2.6053e-02, PNorm = 115.1719, GNorm = 0.3832, lr_0 = 7.1619e-04
Loss = 2.8943e-02, PNorm = 115.2069, GNorm = 0.9735, lr_0 = 7.1570e-04
Loss = 2.8007e-02, PNorm = 115.2487, GNorm = 0.5465, lr_0 = 7.1521e-04
Loss = 3.1573e-02, PNorm = 115.3006, GNorm = 0.2141, lr_0 = 7.1472e-04
Loss = 3.0939e-02, PNorm = 115.3466, GNorm = 0.2898, lr_0 = 7.1423e-04
Loss = 2.9899e-02, PNorm = 115.3969, GNorm = 0.3831, lr_0 = 7.1374e-04
Loss = 2.8353e-02, PNorm = 115.4400, GNorm = 0.3587, lr_0 = 7.1325e-04
Loss = 2.4395e-02, PNorm = 115.4781, GNorm = 0.2215, lr_0 = 7.1277e-04
Loss = 3.1542e-02, PNorm = 115.5186, GNorm = 0.6824, lr_0 = 7.1228e-04
Loss = 2.6567e-02, PNorm = 115.5656, GNorm = 0.2202, lr_0 = 7.1179e-04
Loss = 2.1951e-02, PNorm = 115.6079, GNorm = 0.4120, lr_0 = 7.1130e-04
Loss = 2.5527e-02, PNorm = 115.6424, GNorm = 0.6836, lr_0 = 7.1081e-04
Loss = 2.9611e-02, PNorm = 115.6848, GNorm = 0.4230, lr_0 = 7.1033e-04
Loss = 2.8453e-02, PNorm = 115.7273, GNorm = 0.2359, lr_0 = 7.0984e-04
Loss = 3.0053e-02, PNorm = 115.7711, GNorm = 0.2212, lr_0 = 7.0935e-04
Loss = 2.9221e-02, PNorm = 115.8112, GNorm = 0.7029, lr_0 = 7.0887e-04
Loss = 2.7320e-02, PNorm = 115.8535, GNorm = 0.5099, lr_0 = 7.0838e-04
Loss = 2.8286e-02, PNorm = 115.8906, GNorm = 0.2742, lr_0 = 7.0790e-04
Loss = 2.6221e-02, PNorm = 115.9344, GNorm = 0.3443, lr_0 = 7.0741e-04
Loss = 3.3386e-02, PNorm = 115.9750, GNorm = 0.6189, lr_0 = 7.0693e-04
Loss = 3.0002e-02, PNorm = 116.0323, GNorm = 0.3244, lr_0 = 7.0644e-04
Loss = 2.7484e-02, PNorm = 116.0844, GNorm = 0.2404, lr_0 = 7.0596e-04
Loss = 3.1596e-02, PNorm = 116.1400, GNorm = 0.6079, lr_0 = 7.0548e-04
Loss = 2.4530e-02, PNorm = 116.1831, GNorm = 0.1788, lr_0 = 7.0499e-04
Loss = 2.7150e-02, PNorm = 116.2290, GNorm = 0.3176, lr_0 = 7.0451e-04
Loss = 3.0065e-02, PNorm = 116.2793, GNorm = 0.5110, lr_0 = 7.0403e-04
Loss = 3.4706e-02, PNorm = 116.3380, GNorm = 0.5109, lr_0 = 7.0354e-04
Loss = 2.7392e-02, PNorm = 116.3883, GNorm = 0.3826, lr_0 = 7.0306e-04
Loss = 2.3529e-02, PNorm = 116.4382, GNorm = 0.3107, lr_0 = 7.0258e-04
Loss = 2.8754e-02, PNorm = 116.4882, GNorm = 0.7138, lr_0 = 7.0210e-04
Loss = 2.4626e-02, PNorm = 116.5356, GNorm = 0.5032, lr_0 = 7.0162e-04
Loss = 2.3579e-02, PNorm = 116.5792, GNorm = 0.2075, lr_0 = 7.0114e-04
Loss = 2.6421e-02, PNorm = 116.6191, GNorm = 0.5350, lr_0 = 7.0066e-04
Loss = 2.8738e-02, PNorm = 116.6616, GNorm = 0.4071, lr_0 = 7.0018e-04
Loss = 2.6704e-02, PNorm = 116.7049, GNorm = 0.4376, lr_0 = 6.9970e-04
Loss = 2.6029e-02, PNorm = 116.7511, GNorm = 0.7149, lr_0 = 6.9922e-04
Loss = 2.6812e-02, PNorm = 116.8003, GNorm = 0.5673, lr_0 = 6.9874e-04
Loss = 3.4348e-02, PNorm = 116.8538, GNorm = 1.0015, lr_0 = 6.9826e-04
Loss = 3.4116e-02, PNorm = 116.9128, GNorm = 0.2494, lr_0 = 6.9778e-04
Loss = 2.9385e-02, PNorm = 116.9623, GNorm = 0.5099, lr_0 = 6.9730e-04
Loss = 3.2958e-02, PNorm = 117.0125, GNorm = 0.7129, lr_0 = 6.9683e-04
Loss = 3.2401e-02, PNorm = 117.0627, GNorm = 0.4225, lr_0 = 6.9635e-04
Loss = 2.7458e-02, PNorm = 117.1113, GNorm = 0.2652, lr_0 = 6.9587e-04
Loss = 3.0064e-02, PNorm = 117.1647, GNorm = 0.5510, lr_0 = 6.9540e-04
Loss = 2.2567e-02, PNorm = 117.2208, GNorm = 0.2653, lr_0 = 6.9492e-04
Loss = 2.5972e-02, PNorm = 117.2675, GNorm = 0.2297, lr_0 = 6.9444e-04
Loss = 3.1595e-02, PNorm = 117.3146, GNorm = 0.7417, lr_0 = 6.9397e-04
Loss = 3.1733e-02, PNorm = 117.3656, GNorm = 0.3764, lr_0 = 6.9349e-04
Loss = 2.5679e-02, PNorm = 117.4226, GNorm = 0.3941, lr_0 = 6.9302e-04
Loss = 2.6610e-02, PNorm = 117.4674, GNorm = 0.6502, lr_0 = 6.9254e-04
Loss = 3.2227e-02, PNorm = 117.5261, GNorm = 0.7562, lr_0 = 6.9207e-04
Loss = 2.8075e-02, PNorm = 117.5795, GNorm = 0.3663, lr_0 = 6.9159e-04
Loss = 3.2311e-02, PNorm = 117.6353, GNorm = 0.6077, lr_0 = 6.9112e-04
Loss = 2.6373e-02, PNorm = 117.6873, GNorm = 0.2705, lr_0 = 6.9065e-04
Loss = 2.8843e-02, PNorm = 117.7352, GNorm = 0.3708, lr_0 = 6.9017e-04
Loss = 3.4998e-02, PNorm = 117.7852, GNorm = 0.7400, lr_0 = 6.8970e-04
Loss = 2.8292e-02, PNorm = 117.8351, GNorm = 0.2617, lr_0 = 6.8923e-04
Loss = 2.8815e-02, PNorm = 117.8862, GNorm = 1.0815, lr_0 = 6.8876e-04
Loss = 2.8760e-02, PNorm = 117.9417, GNorm = 0.4518, lr_0 = 6.8828e-04
Loss = 2.8969e-02, PNorm = 117.9997, GNorm = 0.7763, lr_0 = 6.8781e-04
Loss = 3.0064e-02, PNorm = 118.0539, GNorm = 0.2598, lr_0 = 6.8734e-04
Loss = 2.7824e-02, PNorm = 118.1126, GNorm = 0.5774, lr_0 = 6.8687e-04
Loss = 3.0269e-02, PNorm = 118.1740, GNorm = 0.6022, lr_0 = 6.8640e-04
Loss = 3.0596e-02, PNorm = 118.2304, GNorm = 0.4950, lr_0 = 6.8593e-04
Loss = 3.0802e-02, PNorm = 118.2898, GNorm = 0.9096, lr_0 = 6.8546e-04
Loss = 2.7993e-02, PNorm = 118.3535, GNorm = 0.6563, lr_0 = 6.8499e-04
Loss = 2.6740e-02, PNorm = 118.4064, GNorm = 0.2060, lr_0 = 6.8452e-04
Loss = 2.8483e-02, PNorm = 118.4578, GNorm = 0.4886, lr_0 = 6.8405e-04
Loss = 2.7419e-02, PNorm = 118.5097, GNorm = 0.3312, lr_0 = 6.8358e-04
Loss = 2.5279e-02, PNorm = 118.5629, GNorm = 0.6725, lr_0 = 6.8312e-04
Loss = 3.1188e-02, PNorm = 118.6196, GNorm = 0.6792, lr_0 = 6.8265e-04
Loss = 2.6282e-02, PNorm = 118.6702, GNorm = 0.3084, lr_0 = 6.8218e-04
Loss = 3.4058e-02, PNorm = 118.7260, GNorm = 0.5716, lr_0 = 6.8171e-04
Loss = 2.7889e-02, PNorm = 118.7803, GNorm = 0.7506, lr_0 = 6.8125e-04
Loss = 2.7620e-02, PNorm = 118.8342, GNorm = 0.2710, lr_0 = 6.8078e-04
Loss = 2.9274e-02, PNorm = 118.8876, GNorm = 0.3312, lr_0 = 6.8031e-04
Loss = 3.0851e-02, PNorm = 118.9392, GNorm = 0.5037, lr_0 = 6.7985e-04
Loss = 2.6822e-02, PNorm = 118.9811, GNorm = 0.6421, lr_0 = 6.7938e-04
Loss = 2.9841e-02, PNorm = 119.0335, GNorm = 0.7800, lr_0 = 6.7892e-04
Loss = 2.8976e-02, PNorm = 119.0975, GNorm = 0.3810, lr_0 = 6.7845e-04
Loss = 2.6143e-02, PNorm = 119.1584, GNorm = 0.5069, lr_0 = 6.7799e-04
Loss = 2.7184e-02, PNorm = 119.2120, GNorm = 0.3063, lr_0 = 6.7752e-04
Loss = 2.6019e-02, PNorm = 119.2640, GNorm = 0.3738, lr_0 = 6.7706e-04
Loss = 2.5533e-02, PNorm = 119.3176, GNorm = 0.2235, lr_0 = 6.7659e-04
Loss = 2.6301e-02, PNorm = 119.3698, GNorm = 0.3373, lr_0 = 6.7613e-04
Loss = 3.2557e-02, PNorm = 119.4247, GNorm = 0.2337, lr_0 = 6.7567e-04
Loss = 3.3617e-02, PNorm = 119.4903, GNorm = 0.7169, lr_0 = 6.7520e-04
Loss = 2.9380e-02, PNorm = 119.5516, GNorm = 0.6389, lr_0 = 6.7474e-04
Loss = 2.9793e-02, PNorm = 119.6078, GNorm = 0.7972, lr_0 = 6.7428e-04
Loss = 2.7354e-02, PNorm = 119.6651, GNorm = 0.2320, lr_0 = 6.7382e-04
Loss = 3.5332e-02, PNorm = 119.7238, GNorm = 0.3135, lr_0 = 6.7335e-04
Loss = 3.2640e-02, PNorm = 119.7859, GNorm = 0.5541, lr_0 = 6.7289e-04
Loss = 3.4823e-02, PNorm = 119.8421, GNorm = 0.6507, lr_0 = 6.7243e-04
Loss = 3.2299e-02, PNorm = 119.9098, GNorm = 0.6894, lr_0 = 6.7197e-04
Loss = 2.7602e-02, PNorm = 119.9733, GNorm = 0.4700, lr_0 = 6.7151e-04
Loss = 3.1903e-02, PNorm = 120.0295, GNorm = 0.5525, lr_0 = 6.7105e-04
Loss = 3.2627e-02, PNorm = 120.0926, GNorm = 0.4279, lr_0 = 6.7059e-04
Loss = 2.9457e-02, PNorm = 120.1628, GNorm = 0.5618, lr_0 = 6.7013e-04
Loss = 3.0392e-02, PNorm = 120.2206, GNorm = 0.2269, lr_0 = 6.6967e-04
Loss = 2.8259e-02, PNorm = 120.2732, GNorm = 0.3420, lr_0 = 6.6921e-04
Loss = 3.1295e-02, PNorm = 120.3303, GNorm = 0.2954, lr_0 = 6.6876e-04
Loss = 3.1019e-02, PNorm = 120.3952, GNorm = 0.3197, lr_0 = 6.6830e-04
Loss = 3.0031e-02, PNorm = 120.4529, GNorm = 0.2829, lr_0 = 6.6784e-04
Loss = 3.5683e-02, PNorm = 120.5193, GNorm = 0.2277, lr_0 = 6.6738e-04
Loss = 2.9362e-02, PNorm = 120.5821, GNorm = 0.2641, lr_0 = 6.6693e-04
Loss = 3.5509e-02, PNorm = 120.6428, GNorm = 0.3395, lr_0 = 6.6647e-04
Loss = 3.0577e-02, PNorm = 120.7058, GNorm = 0.3205, lr_0 = 6.6601e-04
Loss = 3.1699e-02, PNorm = 120.7701, GNorm = 0.2070, lr_0 = 6.6556e-04
Loss = 3.1387e-02, PNorm = 120.8258, GNorm = 1.2332, lr_0 = 6.6510e-04
Loss = 3.4831e-02, PNorm = 120.8861, GNorm = 0.4798, lr_0 = 6.6464e-04
Loss = 3.2819e-02, PNorm = 120.9550, GNorm = 0.5504, lr_0 = 6.6419e-04
Loss = 2.5060e-02, PNorm = 121.0201, GNorm = 0.1948, lr_0 = 6.6373e-04
Loss = 3.6745e-02, PNorm = 121.0778, GNorm = 0.4713, lr_0 = 6.6328e-04
Loss = 3.1131e-02, PNorm = 121.1426, GNorm = 0.4643, lr_0 = 6.6282e-04
Validation mae = 0.489469
Epoch 7
Loss = 2.1185e-02, PNorm = 121.1986, GNorm = 0.2515, lr_0 = 6.6237e-04
Loss = 2.4601e-02, PNorm = 121.2454, GNorm = 0.3405, lr_0 = 6.6192e-04
Loss = 2.3943e-02, PNorm = 121.2848, GNorm = 0.6454, lr_0 = 6.6146e-04
Loss = 2.6336e-02, PNorm = 121.3208, GNorm = 0.2147, lr_0 = 6.6101e-04
Loss = 2.6626e-02, PNorm = 121.3633, GNorm = 0.4175, lr_0 = 6.6056e-04
Loss = 2.1707e-02, PNorm = 121.4021, GNorm = 0.2016, lr_0 = 6.6011e-04
Loss = 2.4922e-02, PNorm = 121.4426, GNorm = 0.6523, lr_0 = 6.5965e-04
Loss = 2.1043e-02, PNorm = 121.4800, GNorm = 0.2406, lr_0 = 6.5920e-04
Loss = 1.9623e-02, PNorm = 121.5190, GNorm = 0.5012, lr_0 = 6.5875e-04
Loss = 2.2990e-02, PNorm = 121.5549, GNorm = 0.3236, lr_0 = 6.5830e-04
Loss = 2.0856e-02, PNorm = 121.5958, GNorm = 0.4369, lr_0 = 6.5785e-04
Loss = 2.4343e-02, PNorm = 121.6361, GNorm = 0.4249, lr_0 = 6.5740e-04
Loss = 2.1667e-02, PNorm = 121.6761, GNorm = 0.3943, lr_0 = 6.5695e-04
Loss = 2.1197e-02, PNorm = 121.7178, GNorm = 0.4679, lr_0 = 6.5650e-04
Loss = 2.1623e-02, PNorm = 121.7466, GNorm = 0.2774, lr_0 = 6.5605e-04
Loss = 2.0334e-02, PNorm = 121.7756, GNorm = 0.2690, lr_0 = 6.5560e-04
Loss = 1.9973e-02, PNorm = 121.8068, GNorm = 0.2749, lr_0 = 6.5515e-04
Loss = 1.8369e-02, PNorm = 121.8456, GNorm = 0.2152, lr_0 = 6.5470e-04
Loss = 2.2734e-02, PNorm = 121.8875, GNorm = 0.3385, lr_0 = 6.5425e-04
Loss = 2.1733e-02, PNorm = 121.9271, GNorm = 0.3258, lr_0 = 6.5380e-04
Loss = 1.9698e-02, PNorm = 121.9615, GNorm = 0.4645, lr_0 = 6.5335e-04
Loss = 2.3710e-02, PNorm = 122.0022, GNorm = 0.8207, lr_0 = 6.5291e-04
Loss = 2.1232e-02, PNorm = 122.0373, GNorm = 0.2775, lr_0 = 6.5246e-04
Loss = 2.0476e-02, PNorm = 122.0766, GNorm = 0.2111, lr_0 = 6.5201e-04
Loss = 2.2058e-02, PNorm = 122.1126, GNorm = 0.3414, lr_0 = 6.5157e-04
Loss = 2.6145e-02, PNorm = 122.1500, GNorm = 0.5619, lr_0 = 6.5112e-04
Loss = 1.8299e-02, PNorm = 122.1909, GNorm = 0.5925, lr_0 = 6.5067e-04
Loss = 2.3734e-02, PNorm = 122.2335, GNorm = 0.2819, lr_0 = 6.5023e-04
Loss = 2.6024e-02, PNorm = 122.2725, GNorm = 0.4310, lr_0 = 6.4978e-04
Loss = 2.1786e-02, PNorm = 122.3156, GNorm = 0.3585, lr_0 = 6.4934e-04
Loss = 2.3468e-02, PNorm = 122.3541, GNorm = 0.4258, lr_0 = 6.4889e-04
Loss = 2.3030e-02, PNorm = 122.3941, GNorm = 0.2148, lr_0 = 6.4845e-04
Loss = 2.4379e-02, PNorm = 122.4417, GNorm = 0.4347, lr_0 = 6.4800e-04
Loss = 2.7763e-02, PNorm = 122.4967, GNorm = 0.2093, lr_0 = 6.4756e-04
Loss = 2.8913e-02, PNorm = 122.5507, GNorm = 0.2964, lr_0 = 6.4712e-04
Loss = 2.1898e-02, PNorm = 122.5879, GNorm = 0.4368, lr_0 = 6.4667e-04
Loss = 2.5997e-02, PNorm = 122.6252, GNorm = 0.5674, lr_0 = 6.4623e-04
Loss = 2.2922e-02, PNorm = 122.6639, GNorm = 0.3314, lr_0 = 6.4579e-04
Loss = 2.5178e-02, PNorm = 122.7087, GNorm = 0.4471, lr_0 = 6.4534e-04
Loss = 2.2947e-02, PNorm = 122.7535, GNorm = 1.1450, lr_0 = 6.4490e-04
Loss = 2.3353e-02, PNorm = 122.7951, GNorm = 0.4549, lr_0 = 6.4446e-04
Loss = 2.2782e-02, PNorm = 122.8358, GNorm = 0.2975, lr_0 = 6.4402e-04
Loss = 2.1141e-02, PNorm = 122.8732, GNorm = 0.1984, lr_0 = 6.4358e-04
Loss = 2.4317e-02, PNorm = 122.9192, GNorm = 0.2693, lr_0 = 6.4314e-04
Loss = 1.9180e-02, PNorm = 122.9641, GNorm = 0.4909, lr_0 = 6.4270e-04
Loss = 2.6408e-02, PNorm = 123.0062, GNorm = 0.4437, lr_0 = 6.4226e-04
Loss = 2.1231e-02, PNorm = 123.0503, GNorm = 0.3366, lr_0 = 6.4182e-04
Loss = 2.1870e-02, PNorm = 123.0898, GNorm = 0.2236, lr_0 = 6.4138e-04
Loss = 2.0876e-02, PNorm = 123.1367, GNorm = 0.1702, lr_0 = 6.4094e-04
Loss = 2.2338e-02, PNorm = 123.1850, GNorm = 0.1765, lr_0 = 6.4050e-04
Loss = 2.3007e-02, PNorm = 123.2311, GNorm = 0.6860, lr_0 = 6.4006e-04
Loss = 2.3338e-02, PNorm = 123.2785, GNorm = 0.3582, lr_0 = 6.3962e-04
Loss = 2.2045e-02, PNorm = 123.3263, GNorm = 0.3964, lr_0 = 6.3918e-04
Loss = 2.1163e-02, PNorm = 123.3729, GNorm = 0.3164, lr_0 = 6.3874e-04
Loss = 2.7941e-02, PNorm = 123.4208, GNorm = 0.7834, lr_0 = 6.3831e-04
Loss = 2.1572e-02, PNorm = 123.4700, GNorm = 0.2296, lr_0 = 6.3787e-04
Loss = 2.1925e-02, PNorm = 123.5152, GNorm = 0.7306, lr_0 = 6.3743e-04
Loss = 2.4450e-02, PNorm = 123.5645, GNorm = 0.3231, lr_0 = 6.3700e-04
Loss = 2.0403e-02, PNorm = 123.6196, GNorm = 0.3136, lr_0 = 6.3656e-04
Loss = 2.3915e-02, PNorm = 123.6657, GNorm = 0.1813, lr_0 = 6.3612e-04
Loss = 2.3636e-02, PNorm = 123.7051, GNorm = 0.4101, lr_0 = 6.3569e-04
Loss = 2.3576e-02, PNorm = 123.7470, GNorm = 0.3739, lr_0 = 6.3525e-04
Loss = 2.4858e-02, PNorm = 123.7894, GNorm = 0.2930, lr_0 = 6.3482e-04
Loss = 2.0593e-02, PNorm = 123.8316, GNorm = 0.4736, lr_0 = 6.3438e-04
Loss = 2.1684e-02, PNorm = 123.8732, GNorm = 0.3475, lr_0 = 6.3395e-04
Loss = 2.0895e-02, PNorm = 123.9177, GNorm = 0.2652, lr_0 = 6.3351e-04
Loss = 2.2122e-02, PNorm = 123.9673, GNorm = 0.2625, lr_0 = 6.3308e-04
Loss = 2.4012e-02, PNorm = 124.0179, GNorm = 0.4225, lr_0 = 6.3265e-04
Loss = 2.2229e-02, PNorm = 124.0660, GNorm = 0.3913, lr_0 = 6.3221e-04
Loss = 2.3395e-02, PNorm = 124.1159, GNorm = 0.5067, lr_0 = 6.3178e-04
Loss = 2.6294e-02, PNorm = 124.1664, GNorm = 0.8121, lr_0 = 6.3135e-04
Loss = 2.4715e-02, PNorm = 124.2169, GNorm = 0.7184, lr_0 = 6.3091e-04
Loss = 2.3942e-02, PNorm = 124.2636, GNorm = 0.3511, lr_0 = 6.3048e-04
Loss = 2.3378e-02, PNorm = 124.3109, GNorm = 0.3977, lr_0 = 6.3005e-04
Loss = 2.0884e-02, PNorm = 124.3546, GNorm = 0.3413, lr_0 = 6.2962e-04
Loss = 2.3898e-02, PNorm = 124.3971, GNorm = 0.2561, lr_0 = 6.2919e-04
Loss = 2.3506e-02, PNorm = 124.4419, GNorm = 0.4434, lr_0 = 6.2876e-04
Loss = 2.5084e-02, PNorm = 124.4899, GNorm = 0.3796, lr_0 = 6.2833e-04
Loss = 2.3573e-02, PNorm = 124.5416, GNorm = 0.1960, lr_0 = 6.2789e-04
Loss = 2.2952e-02, PNorm = 124.5896, GNorm = 0.2444, lr_0 = 6.2746e-04
Loss = 2.5444e-02, PNorm = 124.6353, GNorm = 0.3710, lr_0 = 6.2703e-04
Loss = 2.3356e-02, PNorm = 124.6920, GNorm = 0.4261, lr_0 = 6.2661e-04
Loss = 2.3504e-02, PNorm = 124.7388, GNorm = 0.9784, lr_0 = 6.2618e-04
Loss = 2.5547e-02, PNorm = 124.7937, GNorm = 0.3535, lr_0 = 6.2575e-04
Loss = 2.1978e-02, PNorm = 124.8433, GNorm = 0.4445, lr_0 = 6.2532e-04
Loss = 2.1279e-02, PNorm = 124.8931, GNorm = 0.6667, lr_0 = 6.2489e-04
Loss = 2.4941e-02, PNorm = 124.9429, GNorm = 0.2827, lr_0 = 6.2446e-04
Loss = 2.0118e-02, PNorm = 124.9916, GNorm = 0.4870, lr_0 = 6.2403e-04
Loss = 2.2797e-02, PNorm = 125.0331, GNorm = 0.8486, lr_0 = 6.2361e-04
Loss = 2.3039e-02, PNorm = 125.0738, GNorm = 0.3084, lr_0 = 6.2318e-04
Loss = 2.2316e-02, PNorm = 125.1217, GNorm = 0.5652, lr_0 = 6.2275e-04
Loss = 2.0412e-02, PNorm = 125.1713, GNorm = 0.3721, lr_0 = 6.2233e-04
Loss = 2.2209e-02, PNorm = 125.2166, GNorm = 0.3422, lr_0 = 6.2190e-04
Loss = 2.2883e-02, PNorm = 125.2659, GNorm = 0.3888, lr_0 = 6.2147e-04
Loss = 2.2271e-02, PNorm = 125.3149, GNorm = 1.0018, lr_0 = 6.2105e-04
Loss = 2.0993e-02, PNorm = 125.3592, GNorm = 0.8506, lr_0 = 6.2062e-04
Loss = 1.8498e-02, PNorm = 125.4049, GNorm = 0.2098, lr_0 = 6.2020e-04
Loss = 2.5003e-02, PNorm = 125.4467, GNorm = 0.2268, lr_0 = 6.1977e-04
Loss = 2.6529e-02, PNorm = 125.4859, GNorm = 0.6326, lr_0 = 6.1935e-04
Loss = 1.9331e-02, PNorm = 125.5361, GNorm = 0.3113, lr_0 = 6.1892e-04
Loss = 2.1365e-02, PNorm = 125.5835, GNorm = 0.5586, lr_0 = 6.1850e-04
Loss = 2.4062e-02, PNorm = 125.6267, GNorm = 0.1698, lr_0 = 6.1808e-04
Loss = 2.4389e-02, PNorm = 125.6736, GNorm = 0.2161, lr_0 = 6.1765e-04
Loss = 2.3114e-02, PNorm = 125.7250, GNorm = 0.4107, lr_0 = 6.1723e-04
Loss = 2.3204e-02, PNorm = 125.7779, GNorm = 0.9082, lr_0 = 6.1681e-04
Loss = 2.1664e-02, PNorm = 125.8283, GNorm = 0.3344, lr_0 = 6.1638e-04
Loss = 2.4657e-02, PNorm = 125.8807, GNorm = 0.4986, lr_0 = 6.1596e-04
Loss = 2.1301e-02, PNorm = 125.9311, GNorm = 0.1868, lr_0 = 6.1554e-04
Loss = 2.3379e-02, PNorm = 125.9819, GNorm = 0.2082, lr_0 = 6.1512e-04
Loss = 2.2649e-02, PNorm = 126.0291, GNorm = 0.4962, lr_0 = 6.1470e-04
Loss = 2.5645e-02, PNorm = 126.0794, GNorm = 0.2976, lr_0 = 6.1428e-04
Loss = 2.1679e-02, PNorm = 126.1320, GNorm = 0.2059, lr_0 = 6.1385e-04
Loss = 3.3143e-02, PNorm = 126.1801, GNorm = 0.2848, lr_0 = 6.1343e-04
Loss = 1.9421e-02, PNorm = 126.2322, GNorm = 0.2788, lr_0 = 6.1301e-04
Loss = 2.7227e-02, PNorm = 126.2802, GNorm = 0.5521, lr_0 = 6.1259e-04
Loss = 1.9960e-02, PNorm = 126.3266, GNorm = 0.4329, lr_0 = 6.1217e-04
Loss = 2.3940e-02, PNorm = 126.3710, GNorm = 0.6552, lr_0 = 6.1175e-04
Loss = 2.5402e-02, PNorm = 126.4195, GNorm = 0.5743, lr_0 = 6.1134e-04
Loss = 2.2900e-02, PNorm = 126.4773, GNorm = 0.2159, lr_0 = 6.1092e-04
Loss = 2.2690e-02, PNorm = 126.5306, GNorm = 0.7831, lr_0 = 6.1050e-04
Validation mae = 0.488738
Epoch 8
Loss = 2.1162e-02, PNorm = 126.5719, GNorm = 0.3223, lr_0 = 6.1008e-04
Loss = 2.0464e-02, PNorm = 126.6053, GNorm = 0.2978, lr_0 = 6.0966e-04
Loss = 1.8766e-02, PNorm = 126.6381, GNorm = 0.3067, lr_0 = 6.0924e-04
Loss = 2.0635e-02, PNorm = 126.6756, GNorm = 0.9019, lr_0 = 6.0883e-04
Loss = 1.8454e-02, PNorm = 126.7064, GNorm = 0.3510, lr_0 = 6.0841e-04
Loss = 2.1863e-02, PNorm = 126.7421, GNorm = 0.4843, lr_0 = 6.0799e-04
Loss = 1.8734e-02, PNorm = 126.7782, GNorm = 0.3004, lr_0 = 6.0758e-04
Loss = 1.3995e-02, PNorm = 126.8171, GNorm = 0.3196, lr_0 = 6.0716e-04
Loss = 1.9735e-02, PNorm = 126.8506, GNorm = 0.4707, lr_0 = 6.0674e-04
Loss = 1.6444e-02, PNorm = 126.8789, GNorm = 0.4861, lr_0 = 6.0633e-04
Loss = 1.6541e-02, PNorm = 126.9073, GNorm = 0.3583, lr_0 = 6.0591e-04
Loss = 1.7820e-02, PNorm = 126.9373, GNorm = 0.2656, lr_0 = 6.0550e-04
Loss = 2.1116e-02, PNorm = 126.9667, GNorm = 0.1780, lr_0 = 6.0508e-04
Loss = 1.8372e-02, PNorm = 126.9924, GNorm = 0.2224, lr_0 = 6.0467e-04
Loss = 2.2125e-02, PNorm = 127.0292, GNorm = 0.2297, lr_0 = 6.0425e-04
Loss = 1.8723e-02, PNorm = 127.0714, GNorm = 0.4668, lr_0 = 6.0384e-04
Loss = 1.8041e-02, PNorm = 127.1059, GNorm = 0.3990, lr_0 = 6.0343e-04
Loss = 1.9577e-02, PNorm = 127.1391, GNorm = 0.2155, lr_0 = 6.0301e-04
Loss = 1.7219e-02, PNorm = 127.1710, GNorm = 0.3583, lr_0 = 6.0260e-04
Loss = 2.0894e-02, PNorm = 127.2056, GNorm = 1.0853, lr_0 = 6.0219e-04
Loss = 1.8494e-02, PNorm = 127.2401, GNorm = 0.2097, lr_0 = 6.0178e-04
Loss = 1.6856e-02, PNorm = 127.2760, GNorm = 0.1379, lr_0 = 6.0136e-04
Loss = 1.8335e-02, PNorm = 127.3113, GNorm = 0.1947, lr_0 = 6.0095e-04
Loss = 1.6355e-02, PNorm = 127.3474, GNorm = 0.1900, lr_0 = 6.0054e-04
Loss = 1.7994e-02, PNorm = 127.3805, GNorm = 0.2033, lr_0 = 6.0013e-04
Loss = 1.9348e-02, PNorm = 127.4085, GNorm = 0.3916, lr_0 = 5.9972e-04
Loss = 1.8307e-02, PNorm = 127.4468, GNorm = 0.2115, lr_0 = 5.9931e-04
Loss = 1.9890e-02, PNorm = 127.4835, GNorm = 0.2041, lr_0 = 5.9890e-04
Loss = 1.9669e-02, PNorm = 127.5215, GNorm = 0.6170, lr_0 = 5.9849e-04
Loss = 1.6971e-02, PNorm = 127.5592, GNorm = 0.4563, lr_0 = 5.9808e-04
Loss = 1.6887e-02, PNorm = 127.5958, GNorm = 0.2494, lr_0 = 5.9767e-04
Loss = 1.6402e-02, PNorm = 127.6304, GNorm = 0.2209, lr_0 = 5.9726e-04
Loss = 1.3809e-02, PNorm = 127.6625, GNorm = 0.4020, lr_0 = 5.9685e-04
Loss = 1.6415e-02, PNorm = 127.6926, GNorm = 0.2015, lr_0 = 5.9644e-04
Loss = 1.6289e-02, PNorm = 127.7249, GNorm = 0.3691, lr_0 = 5.9603e-04
Loss = 2.1508e-02, PNorm = 127.7570, GNorm = 0.1977, lr_0 = 5.9562e-04
Loss = 1.8849e-02, PNorm = 127.7889, GNorm = 0.3201, lr_0 = 5.9521e-04
Loss = 2.0420e-02, PNorm = 127.8248, GNorm = 0.4892, lr_0 = 5.9481e-04
Loss = 1.7762e-02, PNorm = 127.8615, GNorm = 0.2293, lr_0 = 5.9440e-04
Loss = 1.8895e-02, PNorm = 127.8974, GNorm = 0.2340, lr_0 = 5.9399e-04
Loss = 1.5887e-02, PNorm = 127.9351, GNorm = 0.4119, lr_0 = 5.9358e-04
Loss = 1.4447e-02, PNorm = 127.9679, GNorm = 0.2632, lr_0 = 5.9318e-04
Loss = 1.7864e-02, PNorm = 128.0084, GNorm = 0.4943, lr_0 = 5.9277e-04
Loss = 1.8409e-02, PNorm = 128.0433, GNorm = 0.3599, lr_0 = 5.9236e-04
Loss = 1.6565e-02, PNorm = 128.0718, GNorm = 0.3433, lr_0 = 5.9196e-04
Loss = 1.9344e-02, PNorm = 128.1040, GNorm = 0.2747, lr_0 = 5.9155e-04
Loss = 1.7531e-02, PNorm = 128.1401, GNorm = 0.2789, lr_0 = 5.9115e-04
Loss = 1.6403e-02, PNorm = 128.1691, GNorm = 0.2812, lr_0 = 5.9074e-04
Loss = 2.3523e-02, PNorm = 128.2029, GNorm = 0.3516, lr_0 = 5.9034e-04
Loss = 1.7774e-02, PNorm = 128.2472, GNorm = 0.9274, lr_0 = 5.8993e-04
Loss = 2.2508e-02, PNorm = 128.2931, GNorm = 0.2133, lr_0 = 5.8953e-04
Loss = 1.6832e-02, PNorm = 128.3312, GNorm = 0.3311, lr_0 = 5.8913e-04
Loss = 2.2687e-02, PNorm = 128.3696, GNorm = 0.2855, lr_0 = 5.8872e-04
Loss = 1.4539e-02, PNorm = 128.4005, GNorm = 0.2248, lr_0 = 5.8832e-04
Loss = 1.7419e-02, PNorm = 128.4372, GNorm = 0.2339, lr_0 = 5.8792e-04
Loss = 1.9234e-02, PNorm = 128.4772, GNorm = 0.4320, lr_0 = 5.8751e-04
Loss = 2.3844e-02, PNorm = 128.5137, GNorm = 0.3949, lr_0 = 5.8711e-04
Loss = 1.8150e-02, PNorm = 128.5556, GNorm = 0.3153, lr_0 = 5.8671e-04
Loss = 1.5350e-02, PNorm = 128.5904, GNorm = 0.2539, lr_0 = 5.8631e-04
Loss = 1.6890e-02, PNorm = 128.6277, GNorm = 0.3253, lr_0 = 5.8591e-04
Loss = 1.7342e-02, PNorm = 128.6673, GNorm = 0.3279, lr_0 = 5.8550e-04
Loss = 1.6979e-02, PNorm = 128.7075, GNorm = 0.1737, lr_0 = 5.8510e-04
Loss = 2.0175e-02, PNorm = 128.7509, GNorm = 0.4822, lr_0 = 5.8470e-04
Loss = 1.6192e-02, PNorm = 128.7856, GNorm = 0.2690, lr_0 = 5.8430e-04
Loss = 1.7241e-02, PNorm = 128.8172, GNorm = 0.4754, lr_0 = 5.8390e-04
Loss = 1.9690e-02, PNorm = 128.8531, GNorm = 0.4716, lr_0 = 5.8350e-04
Loss = 1.6558e-02, PNorm = 128.8943, GNorm = 0.3959, lr_0 = 5.8310e-04
Loss = 1.6368e-02, PNorm = 128.9337, GNorm = 0.2957, lr_0 = 5.8270e-04
Loss = 1.5852e-02, PNorm = 128.9707, GNorm = 0.1707, lr_0 = 5.8230e-04
Loss = 1.8211e-02, PNorm = 129.0123, GNorm = 0.2259, lr_0 = 5.8190e-04
Loss = 1.7853e-02, PNorm = 129.0548, GNorm = 0.1959, lr_0 = 5.8151e-04
Loss = 2.0330e-02, PNorm = 129.0952, GNorm = 0.6776, lr_0 = 5.8111e-04
Loss = 1.7415e-02, PNorm = 129.1349, GNorm = 0.2748, lr_0 = 5.8071e-04
Loss = 1.6715e-02, PNorm = 129.1733, GNorm = 0.6159, lr_0 = 5.8031e-04
Loss = 1.6420e-02, PNorm = 129.2174, GNorm = 0.2983, lr_0 = 5.7991e-04
Loss = 1.5053e-02, PNorm = 129.2598, GNorm = 0.3526, lr_0 = 5.7952e-04
Loss = 1.7577e-02, PNorm = 129.3012, GNorm = 0.3151, lr_0 = 5.7912e-04
Loss = 1.5907e-02, PNorm = 129.3355, GNorm = 0.2750, lr_0 = 5.7872e-04
Loss = 2.0167e-02, PNorm = 129.3680, GNorm = 0.4965, lr_0 = 5.7833e-04
Loss = 1.6469e-02, PNorm = 129.4079, GNorm = 0.4243, lr_0 = 5.7793e-04
Loss = 1.7700e-02, PNorm = 129.4417, GNorm = 0.6157, lr_0 = 5.7753e-04
Loss = 1.9276e-02, PNorm = 129.4822, GNorm = 0.7084, lr_0 = 5.7714e-04
Loss = 1.9992e-02, PNorm = 129.5176, GNorm = 0.3504, lr_0 = 5.7674e-04
Loss = 1.6554e-02, PNorm = 129.5513, GNorm = 0.2772, lr_0 = 5.7635e-04
Loss = 2.0432e-02, PNorm = 129.5950, GNorm = 0.4756, lr_0 = 5.7595e-04
Loss = 1.8375e-02, PNorm = 129.6285, GNorm = 0.4211, lr_0 = 5.7556e-04
Loss = 1.9530e-02, PNorm = 129.6806, GNorm = 0.1408, lr_0 = 5.7516e-04
Loss = 2.1883e-02, PNorm = 129.7236, GNorm = 0.2376, lr_0 = 5.7477e-04
Loss = 1.7425e-02, PNorm = 129.7643, GNorm = 0.2539, lr_0 = 5.7438e-04
Loss = 1.6676e-02, PNorm = 129.8070, GNorm = 0.6493, lr_0 = 5.7398e-04
Loss = 1.9244e-02, PNorm = 129.8499, GNorm = 0.2900, lr_0 = 5.7359e-04
Loss = 1.9440e-02, PNorm = 129.8939, GNorm = 0.3637, lr_0 = 5.7320e-04
Loss = 2.3237e-02, PNorm = 129.9342, GNorm = 0.3085, lr_0 = 5.7280e-04
Loss = 1.9736e-02, PNorm = 129.9822, GNorm = 0.2427, lr_0 = 5.7241e-04
Loss = 2.0463e-02, PNorm = 130.0254, GNorm = 0.1628, lr_0 = 5.7202e-04
Loss = 1.6132e-02, PNorm = 130.0677, GNorm = 0.2437, lr_0 = 5.7163e-04
Loss = 1.8856e-02, PNorm = 130.1005, GNorm = 0.4864, lr_0 = 5.7124e-04
Loss = 1.9007e-02, PNorm = 130.1349, GNorm = 0.3720, lr_0 = 5.7084e-04
Loss = 1.9154e-02, PNorm = 130.1764, GNorm = 0.1947, lr_0 = 5.7045e-04
Loss = 1.7988e-02, PNorm = 130.2174, GNorm = 0.3492, lr_0 = 5.7006e-04
Loss = 1.7322e-02, PNorm = 130.2561, GNorm = 0.4938, lr_0 = 5.6967e-04
Loss = 1.6905e-02, PNorm = 130.2943, GNorm = 0.3131, lr_0 = 5.6928e-04
Loss = 1.8264e-02, PNorm = 130.3373, GNorm = 0.1591, lr_0 = 5.6889e-04
Loss = 1.8416e-02, PNorm = 130.3839, GNorm = 0.4131, lr_0 = 5.6850e-04
Loss = 2.4653e-02, PNorm = 130.4246, GNorm = 0.5498, lr_0 = 5.6811e-04
Loss = 1.8375e-02, PNorm = 130.4709, GNorm = 0.2065, lr_0 = 5.6772e-04
Loss = 2.1285e-02, PNorm = 130.5225, GNorm = 0.1698, lr_0 = 5.6733e-04
Loss = 2.0386e-02, PNorm = 130.5682, GNorm = 0.3050, lr_0 = 5.6695e-04
Loss = 2.0773e-02, PNorm = 130.6130, GNorm = 0.2159, lr_0 = 5.6656e-04
Loss = 1.7921e-02, PNorm = 130.6563, GNorm = 0.1841, lr_0 = 5.6617e-04
Loss = 1.9093e-02, PNorm = 130.7003, GNorm = 0.2131, lr_0 = 5.6578e-04
Loss = 1.8864e-02, PNorm = 130.7382, GNorm = 0.4473, lr_0 = 5.6539e-04
Loss = 1.6412e-02, PNorm = 130.7834, GNorm = 0.2387, lr_0 = 5.6501e-04
Loss = 1.8642e-02, PNorm = 130.8215, GNorm = 0.4276, lr_0 = 5.6462e-04
Loss = 2.0236e-02, PNorm = 130.8640, GNorm = 0.4501, lr_0 = 5.6423e-04
Loss = 2.0091e-02, PNorm = 130.9113, GNorm = 0.2371, lr_0 = 5.6385e-04
Loss = 1.7838e-02, PNorm = 130.9573, GNorm = 0.1355, lr_0 = 5.6346e-04
Loss = 2.0254e-02, PNorm = 131.0024, GNorm = 0.2281, lr_0 = 5.6307e-04
Loss = 1.9793e-02, PNorm = 131.0456, GNorm = 0.2259, lr_0 = 5.6269e-04
Loss = 1.8965e-02, PNorm = 131.0914, GNorm = 0.2709, lr_0 = 5.6230e-04
Validation mae = 0.487671
Epoch 9
Loss = 1.6459e-02, PNorm = 131.1294, GNorm = 0.1819, lr_0 = 5.6192e-04
Loss = 1.5226e-02, PNorm = 131.1560, GNorm = 0.2167, lr_0 = 5.6153e-04
Loss = 1.8171e-02, PNorm = 131.1853, GNorm = 0.3320, lr_0 = 5.6115e-04
Loss = 1.5885e-02, PNorm = 131.2122, GNorm = 0.4225, lr_0 = 5.6076e-04
Loss = 1.4332e-02, PNorm = 131.2361, GNorm = 0.1589, lr_0 = 5.6038e-04
Loss = 1.6242e-02, PNorm = 131.2617, GNorm = 0.3352, lr_0 = 5.6000e-04
Loss = 1.5330e-02, PNorm = 131.2925, GNorm = 0.3242, lr_0 = 5.5961e-04
Loss = 1.6171e-02, PNorm = 131.3244, GNorm = 0.3056, lr_0 = 5.5923e-04
Loss = 1.5870e-02, PNorm = 131.3486, GNorm = 0.1873, lr_0 = 5.5885e-04
Loss = 1.3556e-02, PNorm = 131.3768, GNorm = 0.4075, lr_0 = 5.5846e-04
Loss = 1.5889e-02, PNorm = 131.3967, GNorm = 0.2876, lr_0 = 5.5808e-04
Loss = 1.7303e-02, PNorm = 131.4223, GNorm = 0.5463, lr_0 = 5.5770e-04
Loss = 1.4031e-02, PNorm = 131.4535, GNorm = 0.2355, lr_0 = 5.5732e-04
Loss = 1.4830e-02, PNorm = 131.4799, GNorm = 0.4284, lr_0 = 5.5693e-04
Loss = 1.4250e-02, PNorm = 131.5073, GNorm = 0.2478, lr_0 = 5.5655e-04
Loss = 1.3163e-02, PNorm = 131.5344, GNorm = 0.3031, lr_0 = 5.5617e-04
Loss = 1.2586e-02, PNorm = 131.5620, GNorm = 0.1712, lr_0 = 5.5579e-04
Loss = 1.3279e-02, PNorm = 131.5897, GNorm = 0.9029, lr_0 = 5.5541e-04
Loss = 1.4357e-02, PNorm = 131.6207, GNorm = 0.2521, lr_0 = 5.5503e-04
Loss = 1.6318e-02, PNorm = 131.6492, GNorm = 0.3777, lr_0 = 5.5465e-04
Loss = 1.2842e-02, PNorm = 131.6785, GNorm = 0.1945, lr_0 = 5.5427e-04
Loss = 1.5635e-02, PNorm = 131.7078, GNorm = 0.1342, lr_0 = 5.5389e-04
Loss = 1.5920e-02, PNorm = 131.7348, GNorm = 0.1831, lr_0 = 5.5351e-04
Loss = 1.2855e-02, PNorm = 131.7641, GNorm = 0.3497, lr_0 = 5.5313e-04
Loss = 1.4793e-02, PNorm = 131.7931, GNorm = 0.1786, lr_0 = 5.5275e-04
Loss = 1.5639e-02, PNorm = 131.8255, GNorm = 0.2290, lr_0 = 5.5237e-04
Loss = 1.5064e-02, PNorm = 131.8539, GNorm = 0.7056, lr_0 = 5.5199e-04
Loss = 1.3688e-02, PNorm = 131.8838, GNorm = 0.3069, lr_0 = 5.5162e-04
Loss = 1.5147e-02, PNorm = 131.9126, GNorm = 0.4705, lr_0 = 5.5124e-04
Loss = 1.6759e-02, PNorm = 131.9401, GNorm = 0.1642, lr_0 = 5.5086e-04
Loss = 1.4243e-02, PNorm = 131.9670, GNorm = 0.1605, lr_0 = 5.5048e-04
Loss = 1.2889e-02, PNorm = 131.9978, GNorm = 0.3003, lr_0 = 5.5011e-04
Loss = 1.3821e-02, PNorm = 132.0252, GNorm = 0.4230, lr_0 = 5.4973e-04
Loss = 1.6112e-02, PNorm = 132.0513, GNorm = 0.7406, lr_0 = 5.4935e-04
Loss = 1.4551e-02, PNorm = 132.0816, GNorm = 0.4168, lr_0 = 5.4898e-04
Loss = 1.4254e-02, PNorm = 132.1133, GNorm = 0.3928, lr_0 = 5.4860e-04
Loss = 1.3782e-02, PNorm = 132.1458, GNorm = 0.1964, lr_0 = 5.4822e-04
Loss = 1.4856e-02, PNorm = 132.1785, GNorm = 0.3245, lr_0 = 5.4785e-04
Loss = 1.5868e-02, PNorm = 132.2087, GNorm = 0.2731, lr_0 = 5.4747e-04
Loss = 1.3958e-02, PNorm = 132.2425, GNorm = 0.2598, lr_0 = 5.4710e-04
Loss = 1.2847e-02, PNorm = 132.2735, GNorm = 0.3676, lr_0 = 5.4672e-04
Loss = 1.2935e-02, PNorm = 132.3013, GNorm = 0.1808, lr_0 = 5.4635e-04
Loss = 1.4463e-02, PNorm = 132.3338, GNorm = 0.2301, lr_0 = 5.4597e-04
Loss = 1.6079e-02, PNorm = 132.3723, GNorm = 0.8004, lr_0 = 5.4560e-04
Loss = 1.8781e-02, PNorm = 132.4029, GNorm = 0.3619, lr_0 = 5.4523e-04
Loss = 1.6262e-02, PNorm = 132.4424, GNorm = 0.6548, lr_0 = 5.4485e-04
Loss = 1.3822e-02, PNorm = 132.4822, GNorm = 0.5533, lr_0 = 5.4448e-04
Loss = 1.6402e-02, PNorm = 132.5183, GNorm = 0.4229, lr_0 = 5.4411e-04
Loss = 1.2736e-02, PNorm = 132.5537, GNorm = 0.2937, lr_0 = 5.4373e-04
Loss = 1.2417e-02, PNorm = 132.5823, GNorm = 0.1054, lr_0 = 5.4336e-04
Loss = 1.4008e-02, PNorm = 132.6092, GNorm = 0.2789, lr_0 = 5.4299e-04
Loss = 1.6345e-02, PNorm = 132.6376, GNorm = 0.3163, lr_0 = 5.4262e-04
Loss = 1.6701e-02, PNorm = 132.6761, GNorm = 0.1790, lr_0 = 5.4225e-04
Loss = 1.3858e-02, PNorm = 132.7108, GNorm = 0.2189, lr_0 = 5.4187e-04
Loss = 1.3816e-02, PNorm = 132.7447, GNorm = 0.4323, lr_0 = 5.4150e-04
Loss = 1.5097e-02, PNorm = 132.7757, GNorm = 0.5654, lr_0 = 5.4113e-04
Loss = 1.5045e-02, PNorm = 132.8071, GNorm = 0.1567, lr_0 = 5.4076e-04
Loss = 1.7808e-02, PNorm = 132.8409, GNorm = 0.5691, lr_0 = 5.4039e-04
Loss = 1.5146e-02, PNorm = 132.8782, GNorm = 0.6590, lr_0 = 5.4002e-04
Loss = 1.3309e-02, PNorm = 132.9118, GNorm = 0.2693, lr_0 = 5.3965e-04
Loss = 1.5227e-02, PNorm = 132.9508, GNorm = 0.4235, lr_0 = 5.3928e-04
Loss = 1.3340e-02, PNorm = 132.9865, GNorm = 0.1723, lr_0 = 5.3891e-04
Loss = 1.5063e-02, PNorm = 133.0176, GNorm = 0.3019, lr_0 = 5.3854e-04
Loss = 1.2344e-02, PNorm = 133.0514, GNorm = 0.1969, lr_0 = 5.3817e-04
Loss = 1.4057e-02, PNorm = 133.0830, GNorm = 0.2788, lr_0 = 5.3781e-04
Loss = 1.6272e-02, PNorm = 133.1234, GNorm = 0.2121, lr_0 = 5.3744e-04
Loss = 1.2311e-02, PNorm = 133.1610, GNorm = 0.3078, lr_0 = 5.3707e-04
Loss = 1.8042e-02, PNorm = 133.1915, GNorm = 0.2126, lr_0 = 5.3670e-04
Loss = 1.2463e-02, PNorm = 133.2148, GNorm = 0.1656, lr_0 = 5.3633e-04
Loss = 1.7722e-02, PNorm = 133.2421, GNorm = 0.4960, lr_0 = 5.3597e-04
Loss = 1.4449e-02, PNorm = 133.2722, GNorm = 0.3930, lr_0 = 5.3560e-04
Loss = 1.8858e-02, PNorm = 133.3117, GNorm = 0.4988, lr_0 = 5.3523e-04
Loss = 1.4793e-02, PNorm = 133.3493, GNorm = 0.2808, lr_0 = 5.3486e-04
Loss = 1.4806e-02, PNorm = 133.3828, GNorm = 0.2172, lr_0 = 5.3450e-04
Loss = 1.4646e-02, PNorm = 133.4189, GNorm = 0.3086, lr_0 = 5.3413e-04
Loss = 1.5735e-02, PNorm = 133.4537, GNorm = 0.3840, lr_0 = 5.3377e-04
Loss = 1.9479e-02, PNorm = 133.4897, GNorm = 0.3018, lr_0 = 5.3340e-04
Loss = 1.2731e-02, PNorm = 133.5183, GNorm = 0.2430, lr_0 = 5.3304e-04
Loss = 1.6184e-02, PNorm = 133.5481, GNorm = 0.4376, lr_0 = 5.3267e-04
Loss = 1.6715e-02, PNorm = 133.5795, GNorm = 0.3398, lr_0 = 5.3231e-04
Loss = 1.5689e-02, PNorm = 133.6175, GNorm = 0.3150, lr_0 = 5.3194e-04
Loss = 1.6656e-02, PNorm = 133.6568, GNorm = 0.1594, lr_0 = 5.3158e-04
Loss = 1.6861e-02, PNorm = 133.6898, GNorm = 0.2013, lr_0 = 5.3121e-04
Loss = 1.3933e-02, PNorm = 133.7194, GNorm = 0.3964, lr_0 = 5.3085e-04
Loss = 1.7171e-02, PNorm = 133.7506, GNorm = 0.2964, lr_0 = 5.3048e-04
Loss = 1.6601e-02, PNorm = 133.7894, GNorm = 0.1974, lr_0 = 5.3012e-04
Loss = 1.6492e-02, PNorm = 133.8308, GNorm = 0.4422, lr_0 = 5.2976e-04
Loss = 1.6085e-02, PNorm = 133.8683, GNorm = 0.3875, lr_0 = 5.2939e-04
Loss = 1.4698e-02, PNorm = 133.9040, GNorm = 0.2905, lr_0 = 5.2903e-04
Loss = 1.4540e-02, PNorm = 133.9404, GNorm = 0.5162, lr_0 = 5.2867e-04
Loss = 1.2721e-02, PNorm = 133.9769, GNorm = 0.3367, lr_0 = 5.2831e-04
Loss = 1.7329e-02, PNorm = 134.0068, GNorm = 0.4480, lr_0 = 5.2795e-04
Loss = 1.6565e-02, PNorm = 134.0457, GNorm = 0.3603, lr_0 = 5.2758e-04
Loss = 1.3208e-02, PNorm = 134.0803, GNorm = 0.3268, lr_0 = 5.2722e-04
Loss = 2.0305e-02, PNorm = 134.1135, GNorm = 0.3835, lr_0 = 5.2686e-04
Loss = 1.2807e-02, PNorm = 134.1514, GNorm = 0.1650, lr_0 = 5.2650e-04
Loss = 1.3900e-02, PNorm = 134.1870, GNorm = 0.1661, lr_0 = 5.2614e-04
Loss = 1.2718e-02, PNorm = 134.2247, GNorm = 0.3072, lr_0 = 5.2578e-04
Loss = 1.7602e-02, PNorm = 134.2574, GNorm = 0.2042, lr_0 = 5.2542e-04
Loss = 1.4661e-02, PNorm = 134.2905, GNorm = 0.1835, lr_0 = 5.2506e-04
Loss = 1.4955e-02, PNorm = 134.3257, GNorm = 0.2647, lr_0 = 5.2470e-04
Loss = 1.7097e-02, PNorm = 134.3609, GNorm = 0.1346, lr_0 = 5.2434e-04
Loss = 1.5335e-02, PNorm = 134.3975, GNorm = 0.1748, lr_0 = 5.2398e-04
Loss = 1.4551e-02, PNorm = 134.4326, GNorm = 0.2418, lr_0 = 5.2362e-04
Loss = 1.4035e-02, PNorm = 134.4680, GNorm = 0.3091, lr_0 = 5.2326e-04
Loss = 1.3021e-02, PNorm = 134.4980, GNorm = 0.1291, lr_0 = 5.2290e-04
Loss = 1.7059e-02, PNorm = 134.5305, GNorm = 0.2044, lr_0 = 5.2255e-04
Loss = 1.3245e-02, PNorm = 134.5665, GNorm = 0.3425, lr_0 = 5.2219e-04
Loss = 1.4523e-02, PNorm = 134.6071, GNorm = 0.2243, lr_0 = 5.2183e-04
Loss = 2.0275e-02, PNorm = 134.6436, GNorm = 0.1656, lr_0 = 5.2147e-04
Loss = 1.2627e-02, PNorm = 134.6807, GNorm = 0.4863, lr_0 = 5.2112e-04
Loss = 1.6521e-02, PNorm = 134.7177, GNorm = 0.2645, lr_0 = 5.2076e-04
Loss = 1.7162e-02, PNorm = 134.7592, GNorm = 0.4243, lr_0 = 5.2040e-04
Loss = 1.7665e-02, PNorm = 134.8031, GNorm = 0.1704, lr_0 = 5.2005e-04
Loss = 1.6914e-02, PNorm = 134.8419, GNorm = 0.2482, lr_0 = 5.1969e-04
Loss = 1.7840e-02, PNorm = 134.8880, GNorm = 0.4885, lr_0 = 5.1933e-04
Loss = 1.5636e-02, PNorm = 134.9300, GNorm = 0.3949, lr_0 = 5.1898e-04
Loss = 1.5828e-02, PNorm = 134.9745, GNorm = 0.3594, lr_0 = 5.1862e-04
Loss = 1.4927e-02, PNorm = 135.0103, GNorm = 0.5422, lr_0 = 5.1827e-04
Loss = 1.5503e-02, PNorm = 135.0438, GNorm = 0.2656, lr_0 = 5.1791e-04
Validation mae = 0.486925
Epoch 10
Loss = 1.2722e-02, PNorm = 135.0750, GNorm = 0.2485, lr_0 = 5.1756e-04
Loss = 1.6551e-02, PNorm = 135.1016, GNorm = 0.4137, lr_0 = 5.1720e-04
Loss = 1.2471e-02, PNorm = 135.1279, GNorm = 0.2663, lr_0 = 5.1685e-04
Loss = 1.2563e-02, PNorm = 135.1480, GNorm = 0.6933, lr_0 = 5.1649e-04
Loss = 1.6942e-02, PNorm = 135.1716, GNorm = 0.1500, lr_0 = 5.1614e-04
Loss = 1.3073e-02, PNorm = 135.1965, GNorm = 0.3089, lr_0 = 5.1579e-04
Loss = 1.3036e-02, PNorm = 135.2234, GNorm = 0.3600, lr_0 = 5.1543e-04
Loss = 1.2709e-02, PNorm = 135.2508, GNorm = 0.1554, lr_0 = 5.1508e-04
Loss = 1.2004e-02, PNorm = 135.2736, GNorm = 0.4151, lr_0 = 5.1473e-04
Loss = 1.2157e-02, PNorm = 135.2944, GNorm = 0.2607, lr_0 = 5.1437e-04
Loss = 1.1460e-02, PNorm = 135.3136, GNorm = 0.5770, lr_0 = 5.1402e-04
Loss = 1.4474e-02, PNorm = 135.3362, GNorm = 0.1605, lr_0 = 5.1367e-04
Loss = 1.1753e-02, PNorm = 135.3587, GNorm = 0.3019, lr_0 = 5.1332e-04
Loss = 1.5122e-02, PNorm = 135.3753, GNorm = 0.1859, lr_0 = 5.1297e-04
Loss = 1.1387e-02, PNorm = 135.3974, GNorm = 0.1966, lr_0 = 5.1262e-04
Loss = 1.4788e-02, PNorm = 135.4221, GNorm = 0.3842, lr_0 = 5.1226e-04
Loss = 1.3385e-02, PNorm = 135.4497, GNorm = 0.2078, lr_0 = 5.1191e-04
Loss = 1.2564e-02, PNorm = 135.4730, GNorm = 0.4043, lr_0 = 5.1156e-04
Loss = 1.2470e-02, PNorm = 135.5002, GNorm = 0.1718, lr_0 = 5.1121e-04
Loss = 1.2455e-02, PNorm = 135.5212, GNorm = 0.1395, lr_0 = 5.1086e-04
Loss = 1.0629e-02, PNorm = 135.5446, GNorm = 0.1406, lr_0 = 5.1051e-04
Loss = 1.3246e-02, PNorm = 135.5683, GNorm = 0.3412, lr_0 = 5.1016e-04
Loss = 1.0631e-02, PNorm = 135.5946, GNorm = 0.1492, lr_0 = 5.0981e-04
Loss = 1.2065e-02, PNorm = 135.6169, GNorm = 0.4707, lr_0 = 5.0946e-04
Loss = 1.1277e-02, PNorm = 135.6393, GNorm = 0.1260, lr_0 = 5.0911e-04
Loss = 1.2318e-02, PNorm = 135.6681, GNorm = 0.2551, lr_0 = 5.0877e-04
Loss = 1.1753e-02, PNorm = 135.6924, GNorm = 0.2304, lr_0 = 5.0842e-04
Loss = 1.3290e-02, PNorm = 135.7214, GNorm = 0.3082, lr_0 = 5.0807e-04
Loss = 1.2300e-02, PNorm = 135.7496, GNorm = 0.1388, lr_0 = 5.0772e-04
Loss = 1.1571e-02, PNorm = 135.7708, GNorm = 0.1982, lr_0 = 5.0737e-04
Loss = 1.1997e-02, PNorm = 135.7973, GNorm = 0.2595, lr_0 = 5.0703e-04
Loss = 1.2642e-02, PNorm = 135.8223, GNorm = 0.1709, lr_0 = 5.0668e-04
Loss = 1.3982e-02, PNorm = 135.8474, GNorm = 0.4305, lr_0 = 5.0633e-04
Loss = 1.0535e-02, PNorm = 135.8732, GNorm = 0.1924, lr_0 = 5.0598e-04
Loss = 1.2027e-02, PNorm = 135.8961, GNorm = 0.2807, lr_0 = 5.0564e-04
Loss = 1.1226e-02, PNorm = 135.9220, GNorm = 0.2642, lr_0 = 5.0529e-04
Loss = 1.5144e-02, PNorm = 135.9488, GNorm = 0.4079, lr_0 = 5.0494e-04
Loss = 1.2530e-02, PNorm = 135.9826, GNorm = 0.3106, lr_0 = 5.0460e-04
Loss = 1.2236e-02, PNorm = 136.0136, GNorm = 0.3021, lr_0 = 5.0425e-04
Loss = 1.2907e-02, PNorm = 136.0371, GNorm = 0.1907, lr_0 = 5.0391e-04
Loss = 1.3320e-02, PNorm = 136.0653, GNorm = 0.3679, lr_0 = 5.0356e-04
Loss = 1.0104e-02, PNorm = 136.0886, GNorm = 0.4507, lr_0 = 5.0322e-04
Loss = 1.1787e-02, PNorm = 136.1101, GNorm = 0.3234, lr_0 = 5.0287e-04
Loss = 1.1814e-02, PNorm = 136.1355, GNorm = 0.3281, lr_0 = 5.0253e-04
Loss = 1.4096e-02, PNorm = 136.1586, GNorm = 0.3821, lr_0 = 5.0218e-04
Loss = 1.1684e-02, PNorm = 136.1892, GNorm = 0.4539, lr_0 = 5.0184e-04
Loss = 1.3928e-02, PNorm = 136.2216, GNorm = 0.2420, lr_0 = 5.0150e-04
Loss = 1.2729e-02, PNorm = 136.2493, GNorm = 0.1664, lr_0 = 5.0115e-04
Loss = 1.3893e-02, PNorm = 136.2737, GNorm = 0.7242, lr_0 = 5.0081e-04
Loss = 1.4124e-02, PNorm = 136.2998, GNorm = 0.4949, lr_0 = 5.0047e-04
Loss = 1.2662e-02, PNorm = 136.3311, GNorm = 0.2424, lr_0 = 5.0012e-04
Loss = 1.0296e-02, PNorm = 136.3614, GNorm = 0.3603, lr_0 = 4.9978e-04
Loss = 1.4470e-02, PNorm = 136.3878, GNorm = 0.6954, lr_0 = 4.9944e-04
Loss = 1.1123e-02, PNorm = 136.4122, GNorm = 0.2080, lr_0 = 4.9910e-04
Loss = 1.2894e-02, PNorm = 136.4370, GNorm = 0.5218, lr_0 = 4.9875e-04
Loss = 1.2014e-02, PNorm = 136.4676, GNorm = 0.1996, lr_0 = 4.9841e-04
Loss = 1.2157e-02, PNorm = 136.4934, GNorm = 0.1221, lr_0 = 4.9807e-04
Loss = 1.0158e-02, PNorm = 136.5234, GNorm = 0.3774, lr_0 = 4.9773e-04
Loss = 1.0434e-02, PNorm = 136.5496, GNorm = 0.3088, lr_0 = 4.9739e-04
Loss = 1.1544e-02, PNorm = 136.5761, GNorm = 0.5601, lr_0 = 4.9705e-04
Loss = 1.1627e-02, PNorm = 136.6032, GNorm = 0.4347, lr_0 = 4.9671e-04
Loss = 1.3447e-02, PNorm = 136.6303, GNorm = 0.5176, lr_0 = 4.9637e-04
Loss = 1.1779e-02, PNorm = 136.6604, GNorm = 0.2173, lr_0 = 4.9603e-04
Loss = 1.5895e-02, PNorm = 136.6880, GNorm = 0.4045, lr_0 = 4.9569e-04
Loss = 1.1896e-02, PNorm = 136.7198, GNorm = 0.2539, lr_0 = 4.9535e-04
Loss = 1.2224e-02, PNorm = 136.7459, GNorm = 0.7037, lr_0 = 4.9501e-04
Loss = 1.4679e-02, PNorm = 136.7745, GNorm = 0.1572, lr_0 = 4.9467e-04
Loss = 1.6750e-02, PNorm = 136.7970, GNorm = 0.5162, lr_0 = 4.9433e-04
Loss = 1.3994e-02, PNorm = 136.8254, GNorm = 0.3930, lr_0 = 4.9399e-04
Loss = 1.7893e-02, PNorm = 136.8508, GNorm = 0.4272, lr_0 = 4.9365e-04
Loss = 1.2037e-02, PNorm = 136.8806, GNorm = 0.3430, lr_0 = 4.9332e-04
Loss = 1.3192e-02, PNorm = 136.9098, GNorm = 0.2674, lr_0 = 4.9298e-04
Loss = 1.0560e-02, PNorm = 136.9394, GNorm = 0.1812, lr_0 = 4.9264e-04
Loss = 1.1439e-02, PNorm = 136.9666, GNorm = 0.2915, lr_0 = 4.9230e-04
Loss = 1.3186e-02, PNorm = 136.9912, GNorm = 0.3987, lr_0 = 4.9197e-04
Loss = 1.7418e-02, PNorm = 137.0188, GNorm = 0.2140, lr_0 = 4.9163e-04
Loss = 1.0491e-02, PNorm = 137.0527, GNorm = 0.2251, lr_0 = 4.9129e-04
Loss = 1.1323e-02, PNorm = 137.0812, GNorm = 0.1527, lr_0 = 4.9095e-04
Loss = 1.2427e-02, PNorm = 137.1094, GNorm = 0.1682, lr_0 = 4.9062e-04
Loss = 1.3880e-02, PNorm = 137.1355, GNorm = 0.5045, lr_0 = 4.9028e-04
Loss = 1.1315e-02, PNorm = 137.1627, GNorm = 0.3220, lr_0 = 4.8995e-04
Loss = 1.2433e-02, PNorm = 137.1861, GNorm = 0.2489, lr_0 = 4.8961e-04
Loss = 1.1283e-02, PNorm = 137.2106, GNorm = 0.1604, lr_0 = 4.8928e-04
Loss = 1.0907e-02, PNorm = 137.2382, GNorm = 0.3836, lr_0 = 4.8894e-04
Loss = 1.3002e-02, PNorm = 137.2666, GNorm = 0.8039, lr_0 = 4.8861e-04
Loss = 1.1525e-02, PNorm = 137.2946, GNorm = 0.1844, lr_0 = 4.8827e-04
Loss = 1.2232e-02, PNorm = 137.3280, GNorm = 0.2710, lr_0 = 4.8794e-04
Loss = 1.2885e-02, PNorm = 137.3575, GNorm = 0.5361, lr_0 = 4.8760e-04
Loss = 1.6335e-02, PNorm = 137.3844, GNorm = 0.2869, lr_0 = 4.8727e-04
Loss = 1.1204e-02, PNorm = 137.4110, GNorm = 0.3171, lr_0 = 4.8693e-04
Loss = 1.0388e-02, PNorm = 137.4388, GNorm = 0.3786, lr_0 = 4.8660e-04
Loss = 1.0306e-02, PNorm = 137.4654, GNorm = 0.2112, lr_0 = 4.8627e-04
Loss = 1.3121e-02, PNorm = 137.4957, GNorm = 0.2741, lr_0 = 4.8593e-04
Loss = 1.0414e-02, PNorm = 137.5263, GNorm = 0.2653, lr_0 = 4.8560e-04
Loss = 1.3264e-02, PNorm = 137.5566, GNorm = 0.2143, lr_0 = 4.8527e-04
Loss = 1.1027e-02, PNorm = 137.5868, GNorm = 0.1751, lr_0 = 4.8494e-04
Loss = 1.1621e-02, PNorm = 137.6141, GNorm = 0.2505, lr_0 = 4.8460e-04
Loss = 1.0464e-02, PNorm = 137.6392, GNorm = 0.1744, lr_0 = 4.8427e-04
Loss = 1.0832e-02, PNorm = 137.6670, GNorm = 0.3871, lr_0 = 4.8394e-04
Loss = 1.2638e-02, PNorm = 137.6976, GNorm = 0.2845, lr_0 = 4.8361e-04
Loss = 1.1745e-02, PNorm = 137.7236, GNorm = 0.1881, lr_0 = 4.8328e-04
Loss = 1.6713e-02, PNorm = 137.7509, GNorm = 0.7525, lr_0 = 4.8295e-04
Loss = 1.4203e-02, PNorm = 137.7773, GNorm = 0.1819, lr_0 = 4.8262e-04
Loss = 1.3715e-02, PNorm = 137.8118, GNorm = 0.3430, lr_0 = 4.8228e-04
Loss = 1.2730e-02, PNorm = 137.8427, GNorm = 0.3925, lr_0 = 4.8195e-04
Loss = 9.9170e-03, PNorm = 137.8773, GNorm = 0.2540, lr_0 = 4.8162e-04
Loss = 1.2406e-02, PNorm = 137.9014, GNorm = 0.1775, lr_0 = 4.8129e-04
Loss = 1.2964e-02, PNorm = 137.9281, GNorm = 0.2379, lr_0 = 4.8096e-04
Loss = 1.4002e-02, PNorm = 137.9623, GNorm = 0.3060, lr_0 = 4.8064e-04
Loss = 1.0851e-02, PNorm = 137.9948, GNorm = 0.3224, lr_0 = 4.8031e-04
Loss = 1.2936e-02, PNorm = 138.0311, GNorm = 0.4123, lr_0 = 4.7998e-04
Loss = 1.0289e-02, PNorm = 138.0670, GNorm = 0.2569, lr_0 = 4.7965e-04
Loss = 1.5124e-02, PNorm = 138.0982, GNorm = 0.7096, lr_0 = 4.7932e-04
Loss = 1.2911e-02, PNorm = 138.1217, GNorm = 0.5329, lr_0 = 4.7899e-04
Loss = 1.1827e-02, PNorm = 138.1507, GNorm = 0.1919, lr_0 = 4.7866e-04
Loss = 1.1825e-02, PNorm = 138.1743, GNorm = 0.8056, lr_0 = 4.7833e-04
Loss = 1.1825e-02, PNorm = 138.2067, GNorm = 0.4619, lr_0 = 4.7801e-04
Loss = 1.1879e-02, PNorm = 138.2354, GNorm = 0.1730, lr_0 = 4.7768e-04
Loss = 1.2836e-02, PNorm = 138.2625, GNorm = 0.2656, lr_0 = 4.7735e-04
Loss = 1.3442e-02, PNorm = 138.2932, GNorm = 0.4121, lr_0 = 4.7703e-04
Validation mae = 0.485003
Epoch 11
Loss = 1.2271e-02, PNorm = 138.3175, GNorm = 0.5720, lr_0 = 4.7670e-04
Loss = 1.4792e-02, PNorm = 138.3376, GNorm = 0.2586, lr_0 = 4.7637e-04
Loss = 1.0808e-02, PNorm = 138.3555, GNorm = 0.2168, lr_0 = 4.7605e-04
Loss = 1.0967e-02, PNorm = 138.3786, GNorm = 0.5398, lr_0 = 4.7572e-04
Loss = 1.0474e-02, PNorm = 138.4001, GNorm = 0.1255, lr_0 = 4.7539e-04
Loss = 1.0905e-02, PNorm = 138.4266, GNorm = 0.3151, lr_0 = 4.7507e-04
Loss = 1.2692e-02, PNorm = 138.4535, GNorm = 0.1627, lr_0 = 4.7474e-04
Loss = 1.2461e-02, PNorm = 138.4777, GNorm = 0.3764, lr_0 = 4.7442e-04
Loss = 1.0909e-02, PNorm = 138.4982, GNorm = 0.3354, lr_0 = 4.7409e-04
Loss = 9.6879e-03, PNorm = 138.5238, GNorm = 0.5457, lr_0 = 4.7377e-04
Loss = 9.8285e-03, PNorm = 138.5442, GNorm = 0.2338, lr_0 = 4.7344e-04
Loss = 1.1359e-02, PNorm = 138.5630, GNorm = 0.1332, lr_0 = 4.7312e-04
Loss = 1.0082e-02, PNorm = 138.5839, GNorm = 0.1599, lr_0 = 4.7279e-04
Loss = 1.1216e-02, PNorm = 138.6021, GNorm = 0.7668, lr_0 = 4.7247e-04
Loss = 1.0438e-02, PNorm = 138.6187, GNorm = 0.4336, lr_0 = 4.7215e-04
Loss = 1.0038e-02, PNorm = 138.6432, GNorm = 0.2087, lr_0 = 4.7182e-04
Loss = 8.5958e-03, PNorm = 138.6688, GNorm = 0.1249, lr_0 = 4.7150e-04
Loss = 9.2285e-03, PNorm = 138.6890, GNorm = 0.1428, lr_0 = 4.7118e-04
Loss = 9.6721e-03, PNorm = 138.7069, GNorm = 0.1533, lr_0 = 4.7085e-04
Loss = 1.2569e-02, PNorm = 138.7266, GNorm = 0.2702, lr_0 = 4.7053e-04
Loss = 9.9618e-03, PNorm = 138.7471, GNorm = 0.2590, lr_0 = 4.7021e-04
Loss = 1.1387e-02, PNorm = 138.7661, GNorm = 0.5974, lr_0 = 4.6989e-04
Loss = 1.2745e-02, PNorm = 138.7918, GNorm = 0.2243, lr_0 = 4.6957e-04
Loss = 9.9283e-03, PNorm = 138.8177, GNorm = 0.4207, lr_0 = 4.6924e-04
Loss = 1.4224e-02, PNorm = 138.8387, GNorm = 0.3498, lr_0 = 4.6892e-04
Loss = 9.1210e-03, PNorm = 138.8579, GNorm = 0.2226, lr_0 = 4.6860e-04
Loss = 1.0228e-02, PNorm = 138.8811, GNorm = 0.2110, lr_0 = 4.6828e-04
Loss = 1.0765e-02, PNorm = 138.9013, GNorm = 0.1484, lr_0 = 4.6796e-04
Loss = 1.1791e-02, PNorm = 138.9235, GNorm = 0.4339, lr_0 = 4.6764e-04
Loss = 1.0336e-02, PNorm = 138.9571, GNorm = 0.1550, lr_0 = 4.6732e-04
Loss = 9.7560e-03, PNorm = 138.9862, GNorm = 0.1026, lr_0 = 4.6700e-04
Loss = 9.8684e-03, PNorm = 139.0071, GNorm = 0.4642, lr_0 = 4.6668e-04
Loss = 9.3217e-03, PNorm = 139.0308, GNorm = 0.2637, lr_0 = 4.6636e-04
Loss = 8.1262e-03, PNorm = 139.0529, GNorm = 0.1096, lr_0 = 4.6604e-04
Loss = 1.0347e-02, PNorm = 139.0754, GNorm = 0.1171, lr_0 = 4.6572e-04
Loss = 1.0143e-02, PNorm = 139.0977, GNorm = 0.1455, lr_0 = 4.6540e-04
Loss = 1.0351e-02, PNorm = 139.1207, GNorm = 0.5671, lr_0 = 4.6508e-04
Loss = 1.1539e-02, PNorm = 139.1368, GNorm = 0.5230, lr_0 = 4.6476e-04
Loss = 1.0501e-02, PNorm = 139.1597, GNorm = 0.9411, lr_0 = 4.6445e-04
Loss = 9.2820e-03, PNorm = 139.1845, GNorm = 0.1355, lr_0 = 4.6413e-04
Loss = 9.4501e-03, PNorm = 139.2053, GNorm = 0.2573, lr_0 = 4.6381e-04
Loss = 9.4173e-03, PNorm = 139.2321, GNorm = 0.5244, lr_0 = 4.6349e-04
Loss = 9.0992e-03, PNorm = 139.2530, GNorm = 0.2702, lr_0 = 4.6317e-04
Loss = 1.0659e-02, PNorm = 139.2694, GNorm = 0.1633, lr_0 = 4.6286e-04
Loss = 9.4864e-03, PNorm = 139.2883, GNorm = 0.1647, lr_0 = 4.6254e-04
Loss = 8.6423e-03, PNorm = 139.3061, GNorm = 0.1510, lr_0 = 4.6222e-04
Loss = 9.4094e-03, PNorm = 139.3265, GNorm = 0.1907, lr_0 = 4.6191e-04
Loss = 1.0319e-02, PNorm = 139.3454, GNorm = 0.2416, lr_0 = 4.6159e-04
Loss = 1.0465e-02, PNorm = 139.3683, GNorm = 0.2795, lr_0 = 4.6127e-04
Loss = 1.1922e-02, PNorm = 139.3963, GNorm = 0.2004, lr_0 = 4.6096e-04
Loss = 9.3980e-03, PNorm = 139.4225, GNorm = 0.2834, lr_0 = 4.6064e-04
Loss = 9.4414e-03, PNorm = 139.4458, GNorm = 0.4270, lr_0 = 4.6033e-04
Loss = 1.0653e-02, PNorm = 139.4702, GNorm = 0.2522, lr_0 = 4.6001e-04
Loss = 1.0138e-02, PNorm = 139.4974, GNorm = 0.1765, lr_0 = 4.5970e-04
Loss = 9.9560e-03, PNorm = 139.5193, GNorm = 0.1942, lr_0 = 4.5938e-04
Loss = 1.1855e-02, PNorm = 139.5405, GNorm = 0.2955, lr_0 = 4.5907e-04
Loss = 1.1243e-02, PNorm = 139.5692, GNorm = 0.2028, lr_0 = 4.5875e-04
Loss = 1.1054e-02, PNorm = 139.5950, GNorm = 0.2349, lr_0 = 4.5844e-04
Loss = 8.3140e-03, PNorm = 139.6176, GNorm = 0.1376, lr_0 = 4.5812e-04
Loss = 1.1227e-02, PNorm = 139.6394, GNorm = 0.3291, lr_0 = 4.5781e-04
Loss = 1.1864e-02, PNorm = 139.6678, GNorm = 0.8162, lr_0 = 4.5750e-04
Loss = 1.0920e-02, PNorm = 139.6883, GNorm = 0.4063, lr_0 = 4.5718e-04
Loss = 1.0279e-02, PNorm = 139.7098, GNorm = 0.3192, lr_0 = 4.5687e-04
Loss = 1.0696e-02, PNorm = 139.7343, GNorm = 0.1785, lr_0 = 4.5656e-04
Loss = 8.9329e-03, PNorm = 139.7568, GNorm = 0.3515, lr_0 = 4.5624e-04
Loss = 1.0528e-02, PNorm = 139.7830, GNorm = 0.2830, lr_0 = 4.5593e-04
Loss = 1.3604e-02, PNorm = 139.8118, GNorm = 0.3783, lr_0 = 4.5562e-04
Loss = 1.1306e-02, PNorm = 139.8338, GNorm = 0.7145, lr_0 = 4.5531e-04
Loss = 9.4164e-03, PNorm = 139.8533, GNorm = 0.4205, lr_0 = 4.5499e-04
Loss = 1.0199e-02, PNorm = 139.8795, GNorm = 0.1392, lr_0 = 4.5468e-04
Loss = 9.5412e-03, PNorm = 139.9094, GNorm = 0.2315, lr_0 = 4.5437e-04
Loss = 9.5592e-03, PNorm = 139.9330, GNorm = 0.6386, lr_0 = 4.5406e-04
Loss = 9.5029e-03, PNorm = 139.9567, GNorm = 0.3075, lr_0 = 4.5375e-04
Loss = 1.1073e-02, PNorm = 139.9816, GNorm = 0.8137, lr_0 = 4.5344e-04
Loss = 9.9676e-03, PNorm = 140.0059, GNorm = 0.4890, lr_0 = 4.5313e-04
Loss = 1.1235e-02, PNorm = 140.0323, GNorm = 0.2201, lr_0 = 4.5282e-04
Loss = 1.0170e-02, PNorm = 140.0569, GNorm = 0.2855, lr_0 = 4.5251e-04
Loss = 1.0391e-02, PNorm = 140.0821, GNorm = 0.5630, lr_0 = 4.5220e-04
Loss = 9.0569e-03, PNorm = 140.1085, GNorm = 0.2308, lr_0 = 4.5189e-04
Loss = 1.1139e-02, PNorm = 140.1338, GNorm = 0.1849, lr_0 = 4.5158e-04
Loss = 1.2665e-02, PNorm = 140.1591, GNorm = 0.2022, lr_0 = 4.5127e-04
Loss = 1.0235e-02, PNorm = 140.1848, GNorm = 0.3469, lr_0 = 4.5096e-04
Loss = 1.0332e-02, PNorm = 140.2088, GNorm = 0.5342, lr_0 = 4.5065e-04
Loss = 1.2302e-02, PNorm = 140.2390, GNorm = 0.6608, lr_0 = 4.5034e-04
Loss = 1.0202e-02, PNorm = 140.2649, GNorm = 0.4887, lr_0 = 4.5003e-04
Loss = 1.0528e-02, PNorm = 140.2861, GNorm = 0.1953, lr_0 = 4.4972e-04
Loss = 1.1600e-02, PNorm = 140.3122, GNorm = 0.1021, lr_0 = 4.4942e-04
Loss = 9.4345e-03, PNorm = 140.3419, GNorm = 0.3196, lr_0 = 4.4911e-04
Loss = 1.1166e-02, PNorm = 140.3686, GNorm = 0.1679, lr_0 = 4.4880e-04
Loss = 1.1414e-02, PNorm = 140.3956, GNorm = 0.2976, lr_0 = 4.4849e-04
Loss = 8.5618e-03, PNorm = 140.4147, GNorm = 0.1331, lr_0 = 4.4819e-04
Loss = 1.0352e-02, PNorm = 140.4395, GNorm = 0.2200, lr_0 = 4.4788e-04
Loss = 9.0439e-03, PNorm = 140.4635, GNorm = 0.3724, lr_0 = 4.4757e-04
Loss = 1.0016e-02, PNorm = 140.4859, GNorm = 0.3608, lr_0 = 4.4727e-04
Loss = 1.0023e-02, PNorm = 140.5110, GNorm = 0.1352, lr_0 = 4.4696e-04
Loss = 1.6352e-02, PNorm = 140.5343, GNorm = 0.1951, lr_0 = 4.4665e-04
Loss = 1.0033e-02, PNorm = 140.5508, GNorm = 0.1236, lr_0 = 4.4635e-04
Loss = 1.0360e-02, PNorm = 140.5679, GNorm = 0.1655, lr_0 = 4.4604e-04
Loss = 1.1563e-02, PNorm = 140.5897, GNorm = 0.4139, lr_0 = 4.4574e-04
Loss = 1.1894e-02, PNorm = 140.6138, GNorm = 0.1701, lr_0 = 4.4543e-04
Loss = 9.6267e-03, PNorm = 140.6410, GNorm = 0.3979, lr_0 = 4.4513e-04
Loss = 1.0210e-02, PNorm = 140.6728, GNorm = 0.1969, lr_0 = 4.4482e-04
Loss = 9.0194e-03, PNorm = 140.6990, GNorm = 0.2379, lr_0 = 4.4452e-04
Loss = 1.0531e-02, PNorm = 140.7240, GNorm = 0.2661, lr_0 = 4.4421e-04
Loss = 1.0465e-02, PNorm = 140.7462, GNorm = 0.1976, lr_0 = 4.4391e-04
Loss = 1.2135e-02, PNorm = 140.7702, GNorm = 0.3882, lr_0 = 4.4360e-04
Loss = 1.0451e-02, PNorm = 140.8038, GNorm = 0.1820, lr_0 = 4.4330e-04
Loss = 1.0206e-02, PNorm = 140.8386, GNorm = 0.2630, lr_0 = 4.4299e-04
Loss = 9.5015e-03, PNorm = 140.8679, GNorm = 0.2629, lr_0 = 4.4269e-04
Loss = 1.2748e-02, PNorm = 140.8917, GNorm = 0.2804, lr_0 = 4.4239e-04
Loss = 1.0103e-02, PNorm = 140.9130, GNorm = 0.1006, lr_0 = 4.4209e-04
Loss = 9.2941e-03, PNorm = 140.9344, GNorm = 0.1764, lr_0 = 4.4178e-04
Loss = 1.0507e-02, PNorm = 140.9540, GNorm = 0.2783, lr_0 = 4.4148e-04
Loss = 9.1242e-03, PNorm = 140.9770, GNorm = 0.1724, lr_0 = 4.4118e-04
Loss = 9.1851e-03, PNorm = 140.9986, GNorm = 0.4554, lr_0 = 4.4088e-04
Loss = 1.1525e-02, PNorm = 141.0225, GNorm = 0.3957, lr_0 = 4.4057e-04
Loss = 8.8586e-03, PNorm = 141.0448, GNorm = 0.2123, lr_0 = 4.4027e-04
Loss = 9.8824e-03, PNorm = 141.0663, GNorm = 0.4019, lr_0 = 4.3997e-04
Loss = 1.0963e-02, PNorm = 141.0895, GNorm = 1.0751, lr_0 = 4.3967e-04
Loss = 1.1104e-02, PNorm = 141.1206, GNorm = 0.4545, lr_0 = 4.3937e-04
Validation mae = 0.480638
Epoch 12
Loss = 8.4163e-03, PNorm = 141.1407, GNorm = 0.1708, lr_0 = 4.3907e-04
Loss = 8.9387e-03, PNorm = 141.1579, GNorm = 0.1245, lr_0 = 4.3877e-04
Loss = 1.2276e-02, PNorm = 141.1757, GNorm = 0.6171, lr_0 = 4.3846e-04
Loss = 8.8625e-03, PNorm = 141.1915, GNorm = 0.1625, lr_0 = 4.3816e-04
Loss = 9.3426e-03, PNorm = 141.2084, GNorm = 0.3751, lr_0 = 4.3786e-04
Loss = 7.0998e-03, PNorm = 141.2299, GNorm = 0.5249, lr_0 = 4.3756e-04
Loss = 8.4683e-03, PNorm = 141.2444, GNorm = 0.4144, lr_0 = 4.3726e-04
Loss = 9.7256e-03, PNorm = 141.2612, GNorm = 0.2155, lr_0 = 4.3696e-04
Loss = 8.1504e-03, PNorm = 141.2789, GNorm = 0.2751, lr_0 = 4.3667e-04
Loss = 7.9172e-03, PNorm = 141.2992, GNorm = 0.1613, lr_0 = 4.3637e-04
Loss = 8.9764e-03, PNorm = 141.3166, GNorm = 0.1951, lr_0 = 4.3607e-04
Loss = 8.8939e-03, PNorm = 141.3306, GNorm = 0.0856, lr_0 = 4.3577e-04
Loss = 7.9480e-03, PNorm = 141.3459, GNorm = 0.1732, lr_0 = 4.3547e-04
Loss = 8.5385e-03, PNorm = 141.3619, GNorm = 0.1443, lr_0 = 4.3517e-04
Loss = 1.0797e-02, PNorm = 141.3766, GNorm = 0.2792, lr_0 = 4.3487e-04
Loss = 9.1788e-03, PNorm = 141.3928, GNorm = 0.4116, lr_0 = 4.3458e-04
Loss = 8.4983e-03, PNorm = 141.4109, GNorm = 0.1999, lr_0 = 4.3428e-04
Loss = 1.1878e-02, PNorm = 141.4300, GNorm = 0.2061, lr_0 = 4.3398e-04
Loss = 9.9882e-03, PNorm = 141.4523, GNorm = 0.6855, lr_0 = 4.3368e-04
Loss = 8.1201e-03, PNorm = 141.4684, GNorm = 0.3489, lr_0 = 4.3339e-04
Loss = 9.5079e-03, PNorm = 141.4863, GNorm = 0.1505, lr_0 = 4.3309e-04
Loss = 9.0081e-03, PNorm = 141.5021, GNorm = 0.3748, lr_0 = 4.3279e-04
Loss = 9.6647e-03, PNorm = 141.5208, GNorm = 0.1723, lr_0 = 4.3250e-04
Loss = 8.3409e-03, PNorm = 141.5417, GNorm = 0.3561, lr_0 = 4.3220e-04
Loss = 8.0190e-03, PNorm = 141.5614, GNorm = 0.2906, lr_0 = 4.3190e-04
Loss = 7.3268e-03, PNorm = 141.5833, GNorm = 0.1901, lr_0 = 4.3161e-04
Loss = 7.1049e-03, PNorm = 141.5996, GNorm = 0.1966, lr_0 = 4.3131e-04
Loss = 7.8937e-03, PNorm = 141.6103, GNorm = 0.1425, lr_0 = 4.3102e-04
Loss = 9.7284e-03, PNorm = 141.6274, GNorm = 0.2853, lr_0 = 4.3072e-04
Loss = 8.0226e-03, PNorm = 141.6499, GNorm = 0.4050, lr_0 = 4.3043e-04
Loss = 7.3346e-03, PNorm = 141.6720, GNorm = 0.3655, lr_0 = 4.3013e-04
Loss = 1.1006e-02, PNorm = 141.6931, GNorm = 0.4164, lr_0 = 4.2984e-04
Loss = 9.4643e-03, PNorm = 141.7119, GNorm = 0.2799, lr_0 = 4.2954e-04
Loss = 9.5087e-03, PNorm = 141.7263, GNorm = 0.1454, lr_0 = 4.2925e-04
Loss = 7.1450e-03, PNorm = 141.7426, GNorm = 0.4749, lr_0 = 4.2895e-04
Loss = 7.8630e-03, PNorm = 141.7615, GNorm = 0.2044, lr_0 = 4.2866e-04
Loss = 8.3733e-03, PNorm = 141.7762, GNorm = 0.2479, lr_0 = 4.2837e-04
Loss = 7.9256e-03, PNorm = 141.7932, GNorm = 0.1751, lr_0 = 4.2807e-04
Loss = 8.1080e-03, PNorm = 141.8112, GNorm = 0.3235, lr_0 = 4.2778e-04
Loss = 9.0954e-03, PNorm = 141.8281, GNorm = 0.2299, lr_0 = 4.2749e-04
Loss = 7.6205e-03, PNorm = 141.8471, GNorm = 0.3714, lr_0 = 4.2719e-04
Loss = 9.2522e-03, PNorm = 141.8636, GNorm = 0.1640, lr_0 = 4.2690e-04
Loss = 7.3809e-03, PNorm = 141.8821, GNorm = 0.1712, lr_0 = 4.2661e-04
Loss = 8.2864e-03, PNorm = 141.9002, GNorm = 0.1373, lr_0 = 4.2632e-04
Loss = 7.6680e-03, PNorm = 141.9185, GNorm = 0.0858, lr_0 = 4.2602e-04
Loss = 1.0170e-02, PNorm = 141.9372, GNorm = 0.1426, lr_0 = 4.2573e-04
Loss = 7.4397e-03, PNorm = 141.9604, GNorm = 0.1071, lr_0 = 4.2544e-04
Loss = 8.8389e-03, PNorm = 141.9833, GNorm = 0.1905, lr_0 = 4.2515e-04
Loss = 7.7462e-03, PNorm = 142.0093, GNorm = 0.1719, lr_0 = 4.2486e-04
Loss = 8.6146e-03, PNorm = 142.0310, GNorm = 0.1568, lr_0 = 4.2457e-04
Loss = 8.0534e-03, PNorm = 142.0522, GNorm = 0.2767, lr_0 = 4.2428e-04
Loss = 6.8109e-03, PNorm = 142.0732, GNorm = 0.1704, lr_0 = 4.2399e-04
Loss = 1.0688e-02, PNorm = 142.0978, GNorm = 0.2489, lr_0 = 4.2370e-04
Loss = 7.4499e-03, PNorm = 142.1180, GNorm = 0.2061, lr_0 = 4.2340e-04
Loss = 9.2533e-03, PNorm = 142.1331, GNorm = 0.1685, lr_0 = 4.2311e-04
Loss = 7.7262e-03, PNorm = 142.1524, GNorm = 0.1702, lr_0 = 4.2283e-04
Loss = 8.0249e-03, PNorm = 142.1729, GNorm = 0.3071, lr_0 = 4.2254e-04
Loss = 8.0821e-03, PNorm = 142.1895, GNorm = 0.2791, lr_0 = 4.2225e-04
Loss = 8.4861e-03, PNorm = 142.2050, GNorm = 0.3329, lr_0 = 4.2196e-04
Loss = 6.7681e-03, PNorm = 142.2164, GNorm = 0.1055, lr_0 = 4.2167e-04
Loss = 7.9774e-03, PNorm = 142.2331, GNorm = 0.1020, lr_0 = 4.2138e-04
Loss = 1.2298e-02, PNorm = 142.2549, GNorm = 0.8666, lr_0 = 4.2109e-04
Loss = 7.7274e-03, PNorm = 142.2784, GNorm = 0.1562, lr_0 = 4.2080e-04
Loss = 8.3510e-03, PNorm = 142.2983, GNorm = 0.1421, lr_0 = 4.2051e-04
Loss = 7.3958e-03, PNorm = 142.3137, GNorm = 0.1290, lr_0 = 4.2023e-04
Loss = 7.9003e-03, PNorm = 142.3321, GNorm = 0.2407, lr_0 = 4.1994e-04
Loss = 9.5261e-03, PNorm = 142.3524, GNorm = 0.2306, lr_0 = 4.1965e-04
Loss = 8.8402e-03, PNorm = 142.3722, GNorm = 0.2650, lr_0 = 4.1936e-04
Loss = 1.0144e-02, PNorm = 142.3962, GNorm = 0.1631, lr_0 = 4.1907e-04
Loss = 7.5034e-03, PNorm = 142.4181, GNorm = 0.2387, lr_0 = 4.1879e-04
Loss = 7.5561e-03, PNorm = 142.4422, GNorm = 0.1422, lr_0 = 4.1850e-04
Loss = 7.0467e-03, PNorm = 142.4649, GNorm = 0.1437, lr_0 = 4.1821e-04
Loss = 7.9858e-03, PNorm = 142.4858, GNorm = 0.1214, lr_0 = 4.1793e-04
Loss = 9.4094e-03, PNorm = 142.5046, GNorm = 0.5265, lr_0 = 4.1764e-04
Loss = 9.4718e-03, PNorm = 142.5266, GNorm = 0.4566, lr_0 = 4.1736e-04
Loss = 7.2651e-03, PNorm = 142.5461, GNorm = 0.4514, lr_0 = 4.1707e-04
Loss = 1.1190e-02, PNorm = 142.5630, GNorm = 0.2079, lr_0 = 4.1678e-04
Loss = 7.4300e-03, PNorm = 142.5815, GNorm = 0.4318, lr_0 = 4.1650e-04
Loss = 9.5746e-03, PNorm = 142.6012, GNorm = 0.3002, lr_0 = 4.1621e-04
Loss = 7.3224e-03, PNorm = 142.6220, GNorm = 0.1590, lr_0 = 4.1593e-04
Loss = 8.4114e-03, PNorm = 142.6445, GNorm = 0.3976, lr_0 = 4.1564e-04
Loss = 8.4647e-03, PNorm = 142.6709, GNorm = 0.2585, lr_0 = 4.1536e-04
Loss = 8.7441e-03, PNorm = 142.6906, GNorm = 0.0987, lr_0 = 4.1507e-04
Loss = 1.2277e-02, PNorm = 142.7068, GNorm = 0.6088, lr_0 = 4.1479e-04
Loss = 1.0110e-02, PNorm = 142.7346, GNorm = 0.4268, lr_0 = 4.1450e-04
Loss = 1.4022e-02, PNorm = 142.7575, GNorm = 0.4034, lr_0 = 4.1422e-04
Loss = 8.4370e-03, PNorm = 142.7793, GNorm = 0.1659, lr_0 = 4.1394e-04
Loss = 6.6839e-03, PNorm = 142.7978, GNorm = 0.1738, lr_0 = 4.1365e-04
Loss = 1.0447e-02, PNorm = 142.8203, GNorm = 0.2045, lr_0 = 4.1337e-04
Loss = 7.5414e-03, PNorm = 142.8361, GNorm = 0.3305, lr_0 = 4.1309e-04
Loss = 8.6170e-03, PNorm = 142.8573, GNorm = 0.2661, lr_0 = 4.1280e-04
Loss = 7.8521e-03, PNorm = 142.8780, GNorm = 0.1555, lr_0 = 4.1252e-04
Loss = 1.0410e-02, PNorm = 142.9021, GNorm = 0.2405, lr_0 = 4.1224e-04
Loss = 8.9993e-03, PNorm = 142.9238, GNorm = 0.2248, lr_0 = 4.1196e-04
Loss = 1.1310e-02, PNorm = 142.9398, GNorm = 0.4805, lr_0 = 4.1167e-04
Loss = 8.5193e-03, PNorm = 142.9586, GNorm = 0.0982, lr_0 = 4.1139e-04
Loss = 6.8489e-03, PNorm = 142.9782, GNorm = 0.2352, lr_0 = 4.1111e-04
Loss = 8.1926e-03, PNorm = 142.9974, GNorm = 0.2756, lr_0 = 4.1083e-04
Loss = 8.8874e-03, PNorm = 143.0179, GNorm = 0.1846, lr_0 = 4.1055e-04
Loss = 9.3629e-03, PNorm = 143.0382, GNorm = 0.2192, lr_0 = 4.1027e-04
Loss = 7.8775e-03, PNorm = 143.0609, GNorm = 0.2227, lr_0 = 4.0998e-04
Loss = 8.1227e-03, PNorm = 143.0846, GNorm = 0.1241, lr_0 = 4.0970e-04
Loss = 8.6836e-03, PNorm = 143.1099, GNorm = 0.2039, lr_0 = 4.0942e-04
Loss = 8.4431e-03, PNorm = 143.1305, GNorm = 0.1580, lr_0 = 4.0914e-04
Loss = 7.9658e-03, PNorm = 143.1475, GNorm = 0.5891, lr_0 = 4.0886e-04
Loss = 9.0794e-03, PNorm = 143.1683, GNorm = 0.1474, lr_0 = 4.0858e-04
Loss = 8.0431e-03, PNorm = 143.1886, GNorm = 0.4530, lr_0 = 4.0830e-04
Loss = 9.2978e-03, PNorm = 143.2073, GNorm = 0.2506, lr_0 = 4.0802e-04
Loss = 8.3988e-03, PNorm = 143.2251, GNorm = 0.3883, lr_0 = 4.0774e-04
Loss = 8.7995e-03, PNorm = 143.2492, GNorm = 0.4419, lr_0 = 4.0746e-04
Loss = 8.8527e-03, PNorm = 143.2670, GNorm = 0.6239, lr_0 = 4.0718e-04
Loss = 9.5540e-03, PNorm = 143.2854, GNorm = 0.3710, lr_0 = 4.0691e-04
Loss = 8.7224e-03, PNorm = 143.3013, GNorm = 0.1196, lr_0 = 4.0663e-04
Loss = 1.0220e-02, PNorm = 143.3170, GNorm = 0.1793, lr_0 = 4.0635e-04
Loss = 8.0120e-03, PNorm = 143.3350, GNorm = 0.4615, lr_0 = 4.0607e-04
Loss = 9.1954e-03, PNorm = 143.3582, GNorm = 0.3008, lr_0 = 4.0579e-04
Loss = 1.0459e-02, PNorm = 143.3795, GNorm = 0.2583, lr_0 = 4.0551e-04
Loss = 1.2967e-02, PNorm = 143.4079, GNorm = 0.3820, lr_0 = 4.0524e-04
Loss = 1.0917e-02, PNorm = 143.4327, GNorm = 0.1221, lr_0 = 4.0496e-04
Loss = 8.7725e-03, PNorm = 143.4550, GNorm = 0.1831, lr_0 = 4.0468e-04
Validation mae = 0.483677
Epoch 13
Loss = 9.3572e-03, PNorm = 143.4757, GNorm = 0.2410, lr_0 = 4.0440e-04
Loss = 1.0488e-02, PNorm = 143.4929, GNorm = 0.6107, lr_0 = 4.0413e-04
Loss = 7.1062e-03, PNorm = 143.5018, GNorm = 0.3387, lr_0 = 4.0385e-04
Loss = 6.9403e-03, PNorm = 143.5116, GNorm = 0.2145, lr_0 = 4.0357e-04
Loss = 8.1119e-03, PNorm = 143.5269, GNorm = 0.2084, lr_0 = 4.0330e-04
Loss = 7.0099e-03, PNorm = 143.5413, GNorm = 0.1522, lr_0 = 4.0302e-04
Loss = 7.7192e-03, PNorm = 143.5587, GNorm = 0.2681, lr_0 = 4.0274e-04
Loss = 6.6357e-03, PNorm = 143.5712, GNorm = 0.1387, lr_0 = 4.0247e-04
Loss = 7.3908e-03, PNorm = 143.5829, GNorm = 0.1769, lr_0 = 4.0219e-04
Loss = 8.4423e-03, PNorm = 143.5954, GNorm = 0.1525, lr_0 = 4.0192e-04
Loss = 7.2170e-03, PNorm = 143.6081, GNorm = 0.2338, lr_0 = 4.0164e-04
Loss = 7.7866e-03, PNorm = 143.6248, GNorm = 0.1697, lr_0 = 4.0137e-04
Loss = 8.6969e-03, PNorm = 143.6401, GNorm = 0.3112, lr_0 = 4.0109e-04
Loss = 7.4928e-03, PNorm = 143.6555, GNorm = 0.3363, lr_0 = 4.0082e-04
Loss = 8.8876e-03, PNorm = 143.6704, GNorm = 0.0985, lr_0 = 4.0054e-04
Loss = 1.0007e-02, PNorm = 143.6869, GNorm = 0.2323, lr_0 = 4.0027e-04
Loss = 9.2258e-03, PNorm = 143.7052, GNorm = 0.1871, lr_0 = 3.9999e-04
Loss = 7.8813e-03, PNorm = 143.7228, GNorm = 0.2837, lr_0 = 3.9972e-04
Loss = 1.0086e-02, PNorm = 143.7388, GNorm = 0.1065, lr_0 = 3.9945e-04
Loss = 6.8052e-03, PNorm = 143.7534, GNorm = 0.2856, lr_0 = 3.9917e-04
Loss = 7.2901e-03, PNorm = 143.7664, GNorm = 0.2278, lr_0 = 3.9890e-04
Loss = 7.5555e-03, PNorm = 143.7769, GNorm = 0.4362, lr_0 = 3.9863e-04
Loss = 7.8190e-03, PNorm = 143.7959, GNorm = 0.2420, lr_0 = 3.9835e-04
Loss = 8.1958e-03, PNorm = 143.8135, GNorm = 0.2164, lr_0 = 3.9808e-04
Loss = 7.4510e-03, PNorm = 143.8252, GNorm = 0.2442, lr_0 = 3.9781e-04
Loss = 9.0208e-03, PNorm = 143.8350, GNorm = 0.4115, lr_0 = 3.9753e-04
Loss = 6.7267e-03, PNorm = 143.8465, GNorm = 0.4213, lr_0 = 3.9726e-04
Loss = 7.2561e-03, PNorm = 143.8635, GNorm = 0.1384, lr_0 = 3.9699e-04
Loss = 6.0891e-03, PNorm = 143.8827, GNorm = 0.1339, lr_0 = 3.9672e-04
Loss = 6.8629e-03, PNorm = 143.8989, GNorm = 0.1697, lr_0 = 3.9645e-04
Loss = 6.9056e-03, PNorm = 143.9179, GNorm = 0.0989, lr_0 = 3.9617e-04
Loss = 6.4359e-03, PNorm = 143.9348, GNorm = 0.1445, lr_0 = 3.9590e-04
Loss = 7.0657e-03, PNorm = 143.9518, GNorm = 0.2700, lr_0 = 3.9563e-04
Loss = 6.3050e-03, PNorm = 143.9668, GNorm = 0.2282, lr_0 = 3.9536e-04
Loss = 6.5113e-03, PNorm = 143.9804, GNorm = 0.1064, lr_0 = 3.9509e-04
Loss = 7.1129e-03, PNorm = 143.9925, GNorm = 0.1629, lr_0 = 3.9482e-04
Loss = 6.2931e-03, PNorm = 144.0061, GNorm = 0.0687, lr_0 = 3.9455e-04
Loss = 6.3743e-03, PNorm = 144.0197, GNorm = 0.2462, lr_0 = 3.9428e-04
Loss = 8.4915e-03, PNorm = 144.0324, GNorm = 0.1802, lr_0 = 3.9401e-04
Loss = 8.4509e-03, PNorm = 144.0477, GNorm = 0.1817, lr_0 = 3.9374e-04
Loss = 7.0909e-03, PNorm = 144.0624, GNorm = 0.3035, lr_0 = 3.9347e-04
Loss = 6.5887e-03, PNorm = 144.0817, GNorm = 0.1067, lr_0 = 3.9320e-04
Loss = 5.8858e-03, PNorm = 144.0988, GNorm = 0.1592, lr_0 = 3.9293e-04
Loss = 8.7884e-03, PNorm = 144.1138, GNorm = 0.0919, lr_0 = 3.9266e-04
Loss = 6.5491e-03, PNorm = 144.1290, GNorm = 0.1959, lr_0 = 3.9239e-04
Loss = 7.1008e-03, PNorm = 144.1468, GNorm = 0.1857, lr_0 = 3.9212e-04
Loss = 6.5217e-03, PNorm = 144.1645, GNorm = 0.1535, lr_0 = 3.9185e-04
Loss = 7.4989e-03, PNorm = 144.1773, GNorm = 0.3116, lr_0 = 3.9159e-04
Loss = 5.8802e-03, PNorm = 144.1943, GNorm = 0.2621, lr_0 = 3.9132e-04
Loss = 6.1871e-03, PNorm = 144.2064, GNorm = 0.1973, lr_0 = 3.9105e-04
Loss = 7.3032e-03, PNorm = 144.2174, GNorm = 0.1332, lr_0 = 3.9078e-04
Loss = 6.5111e-03, PNorm = 144.2298, GNorm = 0.0726, lr_0 = 3.9051e-04
Loss = 6.2520e-03, PNorm = 144.2452, GNorm = 0.3285, lr_0 = 3.9025e-04
Loss = 6.2453e-03, PNorm = 144.2622, GNorm = 0.2225, lr_0 = 3.8998e-04
Loss = 7.7814e-03, PNorm = 144.2767, GNorm = 0.2615, lr_0 = 3.8971e-04
Loss = 6.2536e-03, PNorm = 144.2902, GNorm = 0.2610, lr_0 = 3.8945e-04
Loss = 6.4293e-03, PNorm = 144.3055, GNorm = 0.3171, lr_0 = 3.8918e-04
Loss = 6.1949e-03, PNorm = 144.3177, GNorm = 0.4785, lr_0 = 3.8891e-04
Loss = 7.5372e-03, PNorm = 144.3313, GNorm = 0.3149, lr_0 = 3.8865e-04
Loss = 6.0595e-03, PNorm = 144.3455, GNorm = 0.1880, lr_0 = 3.8838e-04
Loss = 9.4439e-03, PNorm = 144.3586, GNorm = 0.3399, lr_0 = 3.8811e-04
Loss = 6.8502e-03, PNorm = 144.3699, GNorm = 0.1639, lr_0 = 3.8785e-04
Loss = 7.0571e-03, PNorm = 144.3835, GNorm = 0.3655, lr_0 = 3.8758e-04
Loss = 8.1049e-03, PNorm = 144.4052, GNorm = 0.4475, lr_0 = 3.8732e-04
Loss = 7.2317e-03, PNorm = 144.4249, GNorm = 0.3873, lr_0 = 3.8705e-04
Loss = 7.2592e-03, PNorm = 144.4399, GNorm = 0.0965, lr_0 = 3.8679e-04
Loss = 6.0505e-03, PNorm = 144.4540, GNorm = 0.3713, lr_0 = 3.8652e-04
Loss = 7.3013e-03, PNorm = 144.4692, GNorm = 0.1745, lr_0 = 3.8626e-04
Loss = 6.4473e-03, PNorm = 144.4890, GNorm = 0.0959, lr_0 = 3.8599e-04
Loss = 7.3703e-03, PNorm = 144.5084, GNorm = 0.2365, lr_0 = 3.8573e-04
Loss = 8.8256e-03, PNorm = 144.5261, GNorm = 0.3134, lr_0 = 3.8546e-04
Loss = 5.5547e-03, PNorm = 144.5416, GNorm = 0.2238, lr_0 = 3.8520e-04
Loss = 8.5263e-03, PNorm = 144.5586, GNorm = 0.1360, lr_0 = 3.8493e-04
Loss = 7.6613e-03, PNorm = 144.5756, GNorm = 0.3884, lr_0 = 3.8467e-04
Loss = 7.4405e-03, PNorm = 144.5953, GNorm = 0.1995, lr_0 = 3.8441e-04
Loss = 6.9997e-03, PNorm = 144.6158, GNorm = 0.2218, lr_0 = 3.8414e-04
Loss = 7.0609e-03, PNorm = 144.6319, GNorm = 0.1341, lr_0 = 3.8388e-04
Loss = 7.1252e-03, PNorm = 144.6458, GNorm = 0.2704, lr_0 = 3.8362e-04
Loss = 6.9287e-03, PNorm = 144.6588, GNorm = 0.1225, lr_0 = 3.8336e-04
Loss = 6.2425e-03, PNorm = 144.6745, GNorm = 0.2446, lr_0 = 3.8309e-04
Loss = 7.5270e-03, PNorm = 144.6925, GNorm = 0.1453, lr_0 = 3.8283e-04
Loss = 7.9334e-03, PNorm = 144.7090, GNorm = 0.3388, lr_0 = 3.8257e-04
Loss = 7.0270e-03, PNorm = 144.7234, GNorm = 0.4776, lr_0 = 3.8231e-04
Loss = 7.5501e-03, PNorm = 144.7371, GNorm = 0.3162, lr_0 = 3.8204e-04
Loss = 9.6486e-03, PNorm = 144.7520, GNorm = 0.3237, lr_0 = 3.8178e-04
Loss = 8.9344e-03, PNorm = 144.7699, GNorm = 0.1906, lr_0 = 3.8152e-04
Loss = 1.0936e-02, PNorm = 144.7935, GNorm = 0.2194, lr_0 = 3.8126e-04
Loss = 9.2105e-03, PNorm = 144.8159, GNorm = 0.2767, lr_0 = 3.8100e-04
Loss = 6.6240e-03, PNorm = 144.8346, GNorm = 0.2606, lr_0 = 3.8074e-04
Loss = 7.7210e-03, PNorm = 144.8545, GNorm = 0.5665, lr_0 = 3.8048e-04
Loss = 6.7903e-03, PNorm = 144.8758, GNorm = 0.1258, lr_0 = 3.8022e-04
Loss = 5.8377e-03, PNorm = 144.8935, GNorm = 0.1876, lr_0 = 3.7995e-04
Loss = 7.9863e-03, PNorm = 144.9071, GNorm = 0.1553, lr_0 = 3.7969e-04
Loss = 7.5248e-03, PNorm = 144.9196, GNorm = 0.3224, lr_0 = 3.7943e-04
Loss = 8.3178e-03, PNorm = 144.9348, GNorm = 0.1979, lr_0 = 3.7917e-04
Loss = 8.0005e-03, PNorm = 144.9566, GNorm = 0.4647, lr_0 = 3.7891e-04
Loss = 7.1884e-03, PNorm = 144.9784, GNorm = 0.1904, lr_0 = 3.7866e-04
Loss = 6.9931e-03, PNorm = 144.9979, GNorm = 0.2072, lr_0 = 3.7840e-04
Loss = 6.7874e-03, PNorm = 145.0178, GNorm = 0.2343, lr_0 = 3.7814e-04
Loss = 6.7494e-03, PNorm = 145.0352, GNorm = 0.1716, lr_0 = 3.7788e-04
Loss = 6.3738e-03, PNorm = 145.0514, GNorm = 0.1721, lr_0 = 3.7762e-04
Loss = 6.9182e-03, PNorm = 145.0674, GNorm = 0.3008, lr_0 = 3.7736e-04
Loss = 6.9800e-03, PNorm = 145.0879, GNorm = 0.1976, lr_0 = 3.7710e-04
Loss = 7.0524e-03, PNorm = 145.1018, GNorm = 0.1133, lr_0 = 3.7684e-04
Loss = 8.2766e-03, PNorm = 145.1200, GNorm = 0.2065, lr_0 = 3.7659e-04
Loss = 8.9858e-03, PNorm = 145.1401, GNorm = 0.2564, lr_0 = 3.7633e-04
Loss = 7.5381e-03, PNorm = 145.1597, GNorm = 0.2411, lr_0 = 3.7607e-04
Loss = 8.4146e-03, PNorm = 145.1758, GNorm = 0.5504, lr_0 = 3.7581e-04
Loss = 5.6770e-03, PNorm = 145.1953, GNorm = 0.1010, lr_0 = 3.7555e-04
Loss = 6.2516e-03, PNorm = 145.2129, GNorm = 0.4753, lr_0 = 3.7530e-04
Loss = 7.0130e-03, PNorm = 145.2283, GNorm = 0.2563, lr_0 = 3.7504e-04
Loss = 8.5363e-03, PNorm = 145.2448, GNorm = 0.4681, lr_0 = 3.7478e-04
Loss = 5.7901e-03, PNorm = 145.2612, GNorm = 0.2019, lr_0 = 3.7453e-04
Loss = 7.1026e-03, PNorm = 145.2773, GNorm = 0.1009, lr_0 = 3.7427e-04
Loss = 6.4771e-03, PNorm = 145.2948, GNorm = 0.1035, lr_0 = 3.7401e-04
Loss = 6.2428e-03, PNorm = 145.3112, GNorm = 0.1057, lr_0 = 3.7376e-04
Loss = 6.9084e-03, PNorm = 145.3263, GNorm = 0.1536, lr_0 = 3.7350e-04
Loss = 5.8036e-03, PNorm = 145.3447, GNorm = 0.1559, lr_0 = 3.7325e-04
Loss = 6.1298e-03, PNorm = 145.3614, GNorm = 0.3109, lr_0 = 3.7299e-04
Loss = 6.9957e-03, PNorm = 145.3809, GNorm = 0.1571, lr_0 = 3.7273e-04
Validation mae = 0.478739
Epoch 14
Loss = 6.7233e-03, PNorm = 145.3969, GNorm = 0.2594, lr_0 = 3.7248e-04
Loss = 6.7339e-03, PNorm = 145.4119, GNorm = 0.2610, lr_0 = 3.7222e-04
Loss = 6.1828e-03, PNorm = 145.4194, GNorm = 0.1169, lr_0 = 3.7197e-04
Loss = 6.0648e-03, PNorm = 145.4324, GNorm = 0.1625, lr_0 = 3.7171e-04
Loss = 5.7042e-03, PNorm = 145.4440, GNorm = 0.5753, lr_0 = 3.7146e-04
Loss = 7.1736e-03, PNorm = 145.4552, GNorm = 0.1472, lr_0 = 3.7120e-04
Loss = 5.9340e-03, PNorm = 145.4692, GNorm = 0.1573, lr_0 = 3.7095e-04
Loss = 7.8483e-03, PNorm = 145.4818, GNorm = 0.1462, lr_0 = 3.7070e-04
Loss = 6.3022e-03, PNorm = 145.4909, GNorm = 0.3408, lr_0 = 3.7044e-04
Loss = 5.7694e-03, PNorm = 145.5027, GNorm = 0.3698, lr_0 = 3.7019e-04
Loss = 6.1667e-03, PNorm = 145.5145, GNorm = 0.3005, lr_0 = 3.6993e-04
Loss = 8.0858e-03, PNorm = 145.5254, GNorm = 0.2151, lr_0 = 3.6968e-04
Loss = 5.0072e-03, PNorm = 145.5383, GNorm = 0.0986, lr_0 = 3.6943e-04
Loss = 5.8109e-03, PNorm = 145.5476, GNorm = 0.1425, lr_0 = 3.6917e-04
Loss = 6.1445e-03, PNorm = 145.5617, GNorm = 0.1738, lr_0 = 3.6892e-04
Loss = 7.0791e-03, PNorm = 145.5754, GNorm = 0.1298, lr_0 = 3.6867e-04
Loss = 5.4694e-03, PNorm = 145.5914, GNorm = 0.5160, lr_0 = 3.6842e-04
Loss = 6.6562e-03, PNorm = 145.6022, GNorm = 0.7346, lr_0 = 3.6816e-04
Loss = 6.2870e-03, PNorm = 145.6161, GNorm = 0.2255, lr_0 = 3.6791e-04
Loss = 7.9785e-03, PNorm = 145.6286, GNorm = 0.3959, lr_0 = 3.6766e-04
Loss = 5.8400e-03, PNorm = 145.6399, GNorm = 0.0705, lr_0 = 3.6741e-04
Loss = 6.1677e-03, PNorm = 145.6569, GNorm = 0.1633, lr_0 = 3.6716e-04
Loss = 7.0414e-03, PNorm = 145.6713, GNorm = 0.3704, lr_0 = 3.6690e-04
Loss = 6.7723e-03, PNorm = 145.6814, GNorm = 0.2556, lr_0 = 3.6665e-04
Loss = 5.9665e-03, PNorm = 145.6925, GNorm = 0.0729, lr_0 = 3.6640e-04
Loss = 8.4021e-03, PNorm = 145.7033, GNorm = 0.3077, lr_0 = 3.6615e-04
Loss = 6.3253e-03, PNorm = 145.7140, GNorm = 0.1518, lr_0 = 3.6590e-04
Loss = 5.5788e-03, PNorm = 145.7262, GNorm = 0.2820, lr_0 = 3.6565e-04
Loss = 6.8186e-03, PNorm = 145.7379, GNorm = 0.4819, lr_0 = 3.6540e-04
Loss = 6.1652e-03, PNorm = 145.7498, GNorm = 0.1441, lr_0 = 3.6515e-04
Loss = 5.3836e-03, PNorm = 145.7623, GNorm = 0.3888, lr_0 = 3.6490e-04
Loss = 5.6925e-03, PNorm = 145.7728, GNorm = 0.2525, lr_0 = 3.6465e-04
Loss = 5.8412e-03, PNorm = 145.7860, GNorm = 0.1689, lr_0 = 3.6440e-04
Loss = 6.1395e-03, PNorm = 145.8018, GNorm = 0.0979, lr_0 = 3.6415e-04
Loss = 5.4471e-03, PNorm = 145.8170, GNorm = 0.0825, lr_0 = 3.6390e-04
Loss = 5.5634e-03, PNorm = 145.8342, GNorm = 0.2091, lr_0 = 3.6365e-04
Loss = 9.9855e-03, PNorm = 145.8497, GNorm = 0.1367, lr_0 = 3.6340e-04
Loss = 6.3338e-03, PNorm = 145.8650, GNorm = 0.2650, lr_0 = 3.6315e-04
Loss = 5.3819e-03, PNorm = 145.8778, GNorm = 0.3049, lr_0 = 3.6290e-04
Loss = 4.7806e-03, PNorm = 145.8924, GNorm = 0.1384, lr_0 = 3.6266e-04
Loss = 5.8263e-03, PNorm = 145.9022, GNorm = 0.1867, lr_0 = 3.6241e-04
Loss = 6.6284e-03, PNorm = 145.9175, GNorm = 0.4422, lr_0 = 3.6216e-04
Loss = 6.3486e-03, PNorm = 145.9317, GNorm = 0.0764, lr_0 = 3.6191e-04
Loss = 5.4961e-03, PNorm = 145.9411, GNorm = 0.1734, lr_0 = 3.6166e-04
Loss = 6.6354e-03, PNorm = 145.9538, GNorm = 0.1631, lr_0 = 3.6141e-04
Loss = 4.9790e-03, PNorm = 145.9632, GNorm = 0.1965, lr_0 = 3.6117e-04
Loss = 6.0461e-03, PNorm = 145.9739, GNorm = 0.3077, lr_0 = 3.6092e-04
Loss = 7.8074e-03, PNorm = 145.9885, GNorm = 0.1417, lr_0 = 3.6067e-04
Loss = 5.2129e-03, PNorm = 146.0074, GNorm = 0.0642, lr_0 = 3.6043e-04
Loss = 5.5852e-03, PNorm = 146.0195, GNorm = 0.3111, lr_0 = 3.6018e-04
Loss = 5.2931e-03, PNorm = 146.0330, GNorm = 0.3254, lr_0 = 3.5993e-04
Loss = 6.5467e-03, PNorm = 146.0454, GNorm = 0.0767, lr_0 = 3.5969e-04
Loss = 6.8790e-03, PNorm = 146.0596, GNorm = 0.1583, lr_0 = 3.5944e-04
Loss = 5.9345e-03, PNorm = 146.0764, GNorm = 0.2893, lr_0 = 3.5919e-04
Loss = 7.9275e-03, PNorm = 146.0904, GNorm = 0.3001, lr_0 = 3.5895e-04
Loss = 6.3472e-03, PNorm = 146.1015, GNorm = 0.2612, lr_0 = 3.5870e-04
Loss = 5.5076e-03, PNorm = 146.1173, GNorm = 0.3033, lr_0 = 3.5845e-04
Loss = 5.6521e-03, PNorm = 146.1282, GNorm = 0.2808, lr_0 = 3.5821e-04
Loss = 5.2197e-03, PNorm = 146.1402, GNorm = 0.3705, lr_0 = 3.5796e-04
Loss = 7.0895e-03, PNorm = 146.1544, GNorm = 0.1537, lr_0 = 3.5772e-04
Loss = 6.4033e-03, PNorm = 146.1662, GNorm = 0.1677, lr_0 = 3.5747e-04
Loss = 5.5381e-03, PNorm = 146.1818, GNorm = 0.3782, lr_0 = 3.5723e-04
Loss = 6.7528e-03, PNorm = 146.1928, GNorm = 0.4653, lr_0 = 3.5698e-04
Loss = 6.8239e-03, PNorm = 146.2069, GNorm = 0.3443, lr_0 = 3.5674e-04
Loss = 5.5304e-03, PNorm = 146.2205, GNorm = 0.4160, lr_0 = 3.5650e-04
Loss = 5.3875e-03, PNorm = 146.2332, GNorm = 0.0713, lr_0 = 3.5625e-04
Loss = 5.7527e-03, PNorm = 146.2458, GNorm = 0.1916, lr_0 = 3.5601e-04
Loss = 5.4748e-03, PNorm = 146.2600, GNorm = 0.1358, lr_0 = 3.5576e-04
Loss = 5.3286e-03, PNorm = 146.2757, GNorm = 0.1126, lr_0 = 3.5552e-04
Loss = 7.2389e-03, PNorm = 146.2907, GNorm = 0.1213, lr_0 = 3.5528e-04
Loss = 6.6751e-03, PNorm = 146.3021, GNorm = 0.1843, lr_0 = 3.5503e-04
Loss = 6.2481e-03, PNorm = 146.3137, GNorm = 0.4733, lr_0 = 3.5479e-04
Loss = 5.9947e-03, PNorm = 146.3296, GNorm = 0.1066, lr_0 = 3.5455e-04
Loss = 5.6650e-03, PNorm = 146.3468, GNorm = 0.3280, lr_0 = 3.5430e-04
Loss = 5.8570e-03, PNorm = 146.3595, GNorm = 0.1711, lr_0 = 3.5406e-04
Loss = 5.2718e-03, PNorm = 146.3716, GNorm = 0.2305, lr_0 = 3.5382e-04
Loss = 4.6052e-03, PNorm = 146.3818, GNorm = 0.2060, lr_0 = 3.5358e-04
Loss = 8.2322e-03, PNorm = 146.3888, GNorm = 0.1896, lr_0 = 3.5333e-04
Loss = 5.1154e-03, PNorm = 146.4021, GNorm = 0.2969, lr_0 = 3.5309e-04
Loss = 6.3344e-03, PNorm = 146.4162, GNorm = 0.1234, lr_0 = 3.5285e-04
Loss = 6.3242e-03, PNorm = 146.4306, GNorm = 0.2187, lr_0 = 3.5261e-04
Loss = 7.7370e-03, PNorm = 146.4482, GNorm = 0.1900, lr_0 = 3.5237e-04
Loss = 6.7366e-03, PNorm = 146.4651, GNorm = 0.1930, lr_0 = 3.5212e-04
Loss = 7.3977e-03, PNorm = 146.4810, GNorm = 0.1892, lr_0 = 3.5188e-04
Loss = 5.4649e-03, PNorm = 146.4937, GNorm = 0.3560, lr_0 = 3.5164e-04
Loss = 5.0393e-03, PNorm = 146.5055, GNorm = 0.1868, lr_0 = 3.5140e-04
Loss = 5.9356e-03, PNorm = 146.5228, GNorm = 0.1491, lr_0 = 3.5116e-04
Loss = 5.2537e-03, PNorm = 146.5423, GNorm = 0.2380, lr_0 = 3.5092e-04
Loss = 5.0577e-03, PNorm = 146.5571, GNorm = 0.1851, lr_0 = 3.5068e-04
Loss = 6.0201e-03, PNorm = 146.5688, GNorm = 0.2399, lr_0 = 3.5044e-04
Loss = 5.7889e-03, PNorm = 146.5804, GNorm = 0.2669, lr_0 = 3.5020e-04
Loss = 8.6514e-03, PNorm = 146.5943, GNorm = 0.1626, lr_0 = 3.4996e-04
Loss = 5.9385e-03, PNorm = 146.6052, GNorm = 0.2226, lr_0 = 3.4972e-04
Loss = 5.0221e-03, PNorm = 146.6149, GNorm = 0.2277, lr_0 = 3.4948e-04
Loss = 5.4661e-03, PNorm = 146.6312, GNorm = 0.3586, lr_0 = 3.4924e-04
Loss = 5.6782e-03, PNorm = 146.6424, GNorm = 0.2091, lr_0 = 3.4900e-04
Loss = 4.8158e-03, PNorm = 146.6571, GNorm = 0.1696, lr_0 = 3.4876e-04
Loss = 6.0942e-03, PNorm = 146.6757, GNorm = 0.1137, lr_0 = 3.4852e-04
Loss = 5.9536e-03, PNorm = 146.6895, GNorm = 0.1667, lr_0 = 3.4828e-04
Loss = 6.9309e-03, PNorm = 146.7024, GNorm = 0.1676, lr_0 = 3.4805e-04
Loss = 1.0543e-02, PNorm = 146.7133, GNorm = 0.3118, lr_0 = 3.4781e-04
Loss = 6.3681e-03, PNorm = 146.7272, GNorm = 0.0593, lr_0 = 3.4757e-04
Loss = 7.5993e-03, PNorm = 146.7425, GNorm = 0.3353, lr_0 = 3.4733e-04
Loss = 5.9491e-03, PNorm = 146.7550, GNorm = 0.1399, lr_0 = 3.4709e-04
Loss = 6.3126e-03, PNorm = 146.7710, GNorm = 0.1890, lr_0 = 3.4686e-04
Loss = 5.9885e-03, PNorm = 146.7864, GNorm = 0.2116, lr_0 = 3.4662e-04
Loss = 8.6125e-03, PNorm = 146.7999, GNorm = 0.1065, lr_0 = 3.4638e-04
Loss = 5.9889e-03, PNorm = 146.8129, GNorm = 0.2903, lr_0 = 3.4614e-04
Loss = 6.2334e-03, PNorm = 146.8287, GNorm = 0.0731, lr_0 = 3.4591e-04
Loss = 5.9515e-03, PNorm = 146.8416, GNorm = 0.3225, lr_0 = 3.4567e-04
Loss = 7.7652e-03, PNorm = 146.8537, GNorm = 0.2450, lr_0 = 3.4543e-04
Loss = 6.4086e-03, PNorm = 146.8681, GNorm = 0.7093, lr_0 = 3.4520e-04
Loss = 5.3335e-03, PNorm = 146.8854, GNorm = 0.0913, lr_0 = 3.4496e-04
Loss = 5.6944e-03, PNorm = 146.9011, GNorm = 0.3524, lr_0 = 3.4472e-04
Loss = 5.7908e-03, PNorm = 146.9146, GNorm = 0.1872, lr_0 = 3.4449e-04
Loss = 8.9618e-03, PNorm = 146.9289, GNorm = 0.1883, lr_0 = 3.4425e-04
Loss = 5.6735e-03, PNorm = 146.9475, GNorm = 0.2888, lr_0 = 3.4402e-04
Loss = 7.4407e-03, PNorm = 146.9630, GNorm = 0.0902, lr_0 = 3.4378e-04
Loss = 7.6916e-03, PNorm = 146.9806, GNorm = 0.3130, lr_0 = 3.4354e-04
Loss = 5.7165e-03, PNorm = 146.9954, GNorm = 0.1029, lr_0 = 3.4331e-04
Validation mae = 0.480644
Epoch 15
Loss = 4.6951e-03, PNorm = 147.0061, GNorm = 0.2715, lr_0 = 3.4307e-04
Loss = 5.2226e-03, PNorm = 147.0132, GNorm = 0.0945, lr_0 = 3.4284e-04
Loss = 5.8926e-03, PNorm = 147.0190, GNorm = 0.3703, lr_0 = 3.4260e-04
Loss = 4.7613e-03, PNorm = 147.0277, GNorm = 0.1364, lr_0 = 3.4237e-04
Loss = 4.6176e-03, PNorm = 147.0360, GNorm = 0.0920, lr_0 = 3.4213e-04
Loss = 4.5192e-03, PNorm = 147.0454, GNorm = 0.0945, lr_0 = 3.4190e-04
Loss = 5.2875e-03, PNorm = 147.0518, GNorm = 0.3930, lr_0 = 3.4167e-04
Loss = 5.2107e-03, PNorm = 147.0645, GNorm = 0.1747, lr_0 = 3.4143e-04
Loss = 5.6921e-03, PNorm = 147.0773, GNorm = 0.1962, lr_0 = 3.4120e-04
Loss = 5.4645e-03, PNorm = 147.0889, GNorm = 0.3182, lr_0 = 3.4096e-04
Loss = 6.6235e-03, PNorm = 147.0972, GNorm = 0.1732, lr_0 = 3.4073e-04
Loss = 6.1807e-03, PNorm = 147.1082, GNorm = 0.1550, lr_0 = 3.4050e-04
Loss = 4.9970e-03, PNorm = 147.1187, GNorm = 0.2704, lr_0 = 3.4026e-04
Loss = 4.5021e-03, PNorm = 147.1285, GNorm = 0.2400, lr_0 = 3.4003e-04
Loss = 4.8034e-03, PNorm = 147.1393, GNorm = 0.2178, lr_0 = 3.3980e-04
Loss = 5.7865e-03, PNorm = 147.1504, GNorm = 0.2459, lr_0 = 3.3956e-04
Loss = 4.1619e-03, PNorm = 147.1568, GNorm = 0.1245, lr_0 = 3.3933e-04
Loss = 4.8101e-03, PNorm = 147.1664, GNorm = 0.2768, lr_0 = 3.3910e-04
Loss = 5.9480e-03, PNorm = 147.1769, GNorm = 0.1077, lr_0 = 3.3887e-04
Loss = 3.9069e-03, PNorm = 147.1866, GNorm = 0.0950, lr_0 = 3.3864e-04
Loss = 4.5244e-03, PNorm = 147.1991, GNorm = 0.1177, lr_0 = 3.3840e-04
Loss = 6.4909e-03, PNorm = 147.2073, GNorm = 0.1619, lr_0 = 3.3817e-04
Loss = 5.3011e-03, PNorm = 147.2177, GNorm = 0.0856, lr_0 = 3.3794e-04
Loss = 4.9470e-03, PNorm = 147.2295, GNorm = 0.1454, lr_0 = 3.3771e-04
Loss = 4.6742e-03, PNorm = 147.2423, GNorm = 0.1591, lr_0 = 3.3748e-04
Loss = 6.3404e-03, PNorm = 147.2521, GNorm = 0.3461, lr_0 = 3.3725e-04
Loss = 9.0459e-03, PNorm = 147.2643, GNorm = 0.4336, lr_0 = 3.3701e-04
Loss = 5.5123e-03, PNorm = 147.2741, GNorm = 0.3305, lr_0 = 3.3678e-04
Loss = 5.3763e-03, PNorm = 147.2833, GNorm = 0.3762, lr_0 = 3.3655e-04
Loss = 4.2945e-03, PNorm = 147.2934, GNorm = 0.3561, lr_0 = 3.3632e-04
Loss = 5.2214e-03, PNorm = 147.3066, GNorm = 0.0936, lr_0 = 3.3609e-04
Loss = 4.9582e-03, PNorm = 147.3174, GNorm = 0.1836, lr_0 = 3.3586e-04
Loss = 5.5269e-03, PNorm = 147.3259, GNorm = 0.1363, lr_0 = 3.3563e-04
Loss = 5.1827e-03, PNorm = 147.3365, GNorm = 0.2078, lr_0 = 3.3540e-04
Loss = 6.8687e-03, PNorm = 147.3439, GNorm = 0.2862, lr_0 = 3.3517e-04
Loss = 5.5764e-03, PNorm = 147.3549, GNorm = 0.1864, lr_0 = 3.3494e-04
Loss = 5.2040e-03, PNorm = 147.3656, GNorm = 0.1334, lr_0 = 3.3471e-04
Loss = 5.4940e-03, PNorm = 147.3745, GNorm = 0.0948, lr_0 = 3.3448e-04
Loss = 5.3270e-03, PNorm = 147.3870, GNorm = 0.1335, lr_0 = 3.3425e-04
Loss = 6.3039e-03, PNorm = 147.4025, GNorm = 0.3656, lr_0 = 3.3403e-04
Loss = 5.0044e-03, PNorm = 147.4129, GNorm = 0.4159, lr_0 = 3.3380e-04
Loss = 4.7738e-03, PNorm = 147.4262, GNorm = 0.3457, lr_0 = 3.3357e-04
Loss = 5.0187e-03, PNorm = 147.4354, GNorm = 0.1575, lr_0 = 3.3334e-04
Loss = 4.9993e-03, PNorm = 147.4469, GNorm = 0.1573, lr_0 = 3.3311e-04
Loss = 4.7067e-03, PNorm = 147.4596, GNorm = 0.2417, lr_0 = 3.3288e-04
Loss = 5.0551e-03, PNorm = 147.4677, GNorm = 0.2544, lr_0 = 3.3265e-04
Loss = 4.7744e-03, PNorm = 147.4785, GNorm = 0.3378, lr_0 = 3.3243e-04
Loss = 3.9450e-03, PNorm = 147.4914, GNorm = 0.2215, lr_0 = 3.3220e-04
Loss = 5.0385e-03, PNorm = 147.4994, GNorm = 0.2301, lr_0 = 3.3197e-04
Loss = 3.9746e-03, PNorm = 147.5109, GNorm = 0.1100, lr_0 = 3.3174e-04
Loss = 4.5293e-03, PNorm = 147.5258, GNorm = 0.2002, lr_0 = 3.3152e-04
Loss = 4.8414e-03, PNorm = 147.5360, GNorm = 0.2019, lr_0 = 3.3129e-04
Loss = 4.1281e-03, PNorm = 147.5451, GNorm = 0.2000, lr_0 = 3.3106e-04
Loss = 4.9989e-03, PNorm = 147.5565, GNorm = 0.3521, lr_0 = 3.3084e-04
Loss = 6.1287e-03, PNorm = 147.5683, GNorm = 0.3722, lr_0 = 3.3061e-04
Loss = 7.4622e-03, PNorm = 147.5774, GNorm = 0.4643, lr_0 = 3.3038e-04
Loss = 7.1781e-03, PNorm = 147.5903, GNorm = 0.0638, lr_0 = 3.3016e-04
Loss = 5.8282e-03, PNorm = 147.6014, GNorm = 0.3679, lr_0 = 3.2993e-04
Loss = 4.3880e-03, PNorm = 147.6078, GNorm = 0.1918, lr_0 = 3.2970e-04
Loss = 5.7319e-03, PNorm = 147.6172, GNorm = 0.0843, lr_0 = 3.2948e-04
Loss = 4.6491e-03, PNorm = 147.6313, GNorm = 0.2377, lr_0 = 3.2925e-04
Loss = 4.9471e-03, PNorm = 147.6487, GNorm = 0.2434, lr_0 = 3.2903e-04
Loss = 5.9699e-03, PNorm = 147.6619, GNorm = 0.3123, lr_0 = 3.2880e-04
Loss = 4.7040e-03, PNorm = 147.6728, GNorm = 0.1417, lr_0 = 3.2858e-04
Loss = 4.7313e-03, PNorm = 147.6830, GNorm = 0.4931, lr_0 = 3.2835e-04
Loss = 7.0940e-03, PNorm = 147.6934, GNorm = 0.0931, lr_0 = 3.2813e-04
Loss = 6.0795e-03, PNorm = 147.7075, GNorm = 0.6403, lr_0 = 3.2790e-04
Loss = 6.6074e-03, PNorm = 147.7205, GNorm = 0.1322, lr_0 = 3.2768e-04
Loss = 4.2550e-03, PNorm = 147.7331, GNorm = 0.0620, lr_0 = 3.2745e-04
Loss = 5.9690e-03, PNorm = 147.7476, GNorm = 0.4336, lr_0 = 3.2723e-04
Loss = 4.6550e-03, PNorm = 147.7628, GNorm = 0.1846, lr_0 = 3.2700e-04
Loss = 4.0562e-03, PNorm = 147.7764, GNorm = 0.1662, lr_0 = 3.2678e-04
Loss = 4.2220e-03, PNorm = 147.7869, GNorm = 0.1273, lr_0 = 3.2656e-04
Loss = 5.1034e-03, PNorm = 147.7976, GNorm = 0.2994, lr_0 = 3.2633e-04
Loss = 4.4363e-03, PNorm = 147.8088, GNorm = 0.1441, lr_0 = 3.2611e-04
Loss = 5.2668e-03, PNorm = 147.8224, GNorm = 0.2132, lr_0 = 3.2589e-04
Loss = 5.9853e-03, PNorm = 147.8339, GNorm = 0.1724, lr_0 = 3.2566e-04
Loss = 4.0197e-03, PNorm = 147.8453, GNorm = 0.4290, lr_0 = 3.2544e-04
Loss = 5.9358e-03, PNorm = 147.8553, GNorm = 0.1268, lr_0 = 3.2522e-04
Loss = 5.8241e-03, PNorm = 147.8643, GNorm = 0.2503, lr_0 = 3.2499e-04
Loss = 4.8516e-03, PNorm = 147.8746, GNorm = 0.0792, lr_0 = 3.2477e-04
Loss = 5.5888e-03, PNorm = 147.8848, GNorm = 0.1218, lr_0 = 3.2455e-04
Loss = 6.3865e-03, PNorm = 147.8953, GNorm = 0.4461, lr_0 = 3.2433e-04
Loss = 4.4889e-03, PNorm = 147.9060, GNorm = 0.3019, lr_0 = 3.2410e-04
Loss = 4.9117e-03, PNorm = 147.9148, GNorm = 0.0846, lr_0 = 3.2388e-04
Loss = 4.4208e-03, PNorm = 147.9240, GNorm = 0.4621, lr_0 = 3.2366e-04
Loss = 4.7000e-03, PNorm = 147.9365, GNorm = 0.3540, lr_0 = 3.2344e-04
Loss = 6.2416e-03, PNorm = 147.9506, GNorm = 0.2171, lr_0 = 3.2322e-04
Loss = 4.8336e-03, PNorm = 147.9646, GNorm = 0.1995, lr_0 = 3.2300e-04
Loss = 4.8736e-03, PNorm = 147.9785, GNorm = 0.2651, lr_0 = 3.2277e-04
Loss = 5.9312e-03, PNorm = 147.9897, GNorm = 0.1225, lr_0 = 3.2255e-04
Loss = 4.5451e-03, PNorm = 148.0031, GNorm = 0.1901, lr_0 = 3.2233e-04
Loss = 5.6147e-03, PNorm = 148.0144, GNorm = 0.3623, lr_0 = 3.2211e-04
Loss = 6.6350e-03, PNorm = 148.0256, GNorm = 0.2078, lr_0 = 3.2189e-04
Loss = 5.7631e-03, PNorm = 148.0391, GNorm = 0.2164, lr_0 = 3.2167e-04
Loss = 4.2317e-03, PNorm = 148.0527, GNorm = 0.1963, lr_0 = 3.2145e-04
Loss = 3.9575e-03, PNorm = 148.0605, GNorm = 0.1432, lr_0 = 3.2123e-04
Loss = 4.7709e-03, PNorm = 148.0687, GNorm = 0.1546, lr_0 = 3.2101e-04
Loss = 4.9397e-03, PNorm = 148.0805, GNorm = 0.1605, lr_0 = 3.2079e-04
Loss = 7.6170e-03, PNorm = 148.0940, GNorm = 0.5120, lr_0 = 3.2057e-04
Loss = 4.1532e-03, PNorm = 148.1052, GNorm = 0.1148, lr_0 = 3.2035e-04
Loss = 6.1305e-03, PNorm = 148.1173, GNorm = 0.3450, lr_0 = 3.2013e-04
Loss = 5.3524e-03, PNorm = 148.1311, GNorm = 0.1602, lr_0 = 3.1991e-04
Loss = 6.0947e-03, PNorm = 148.1442, GNorm = 0.2184, lr_0 = 3.1969e-04
Loss = 4.7710e-03, PNorm = 148.1588, GNorm = 0.1541, lr_0 = 3.1947e-04
Loss = 6.0003e-03, PNorm = 148.1714, GNorm = 0.2082, lr_0 = 3.1925e-04
Loss = 4.7314e-03, PNorm = 148.1832, GNorm = 0.1201, lr_0 = 3.1904e-04
Loss = 6.8728e-03, PNorm = 148.1949, GNorm = 0.4601, lr_0 = 3.1882e-04
Loss = 4.9022e-03, PNorm = 148.2053, GNorm = 0.1266, lr_0 = 3.1860e-04
Loss = 4.3103e-03, PNorm = 148.2145, GNorm = 0.0951, lr_0 = 3.1838e-04
Loss = 6.2672e-03, PNorm = 148.2225, GNorm = 0.1714, lr_0 = 3.1816e-04
Loss = 5.9443e-03, PNorm = 148.2308, GNorm = 0.2177, lr_0 = 3.1794e-04
Loss = 6.1428e-03, PNorm = 148.2449, GNorm = 0.1076, lr_0 = 3.1773e-04
Loss = 5.3354e-03, PNorm = 148.2607, GNorm = 0.0819, lr_0 = 3.1751e-04
Loss = 5.1250e-03, PNorm = 148.2756, GNorm = 0.1600, lr_0 = 3.1729e-04
Loss = 6.2569e-03, PNorm = 148.2944, GNorm = 0.0756, lr_0 = 3.1707e-04
Loss = 5.2688e-03, PNorm = 148.3073, GNorm = 0.1641, lr_0 = 3.1686e-04
Loss = 6.5355e-03, PNorm = 148.3187, GNorm = 0.2548, lr_0 = 3.1664e-04
Loss = 5.7691e-03, PNorm = 148.3275, GNorm = 0.2149, lr_0 = 3.1642e-04
Loss = 7.9647e-03, PNorm = 148.3392, GNorm = 0.1655, lr_0 = 3.1621e-04
Validation mae = 0.479473
Epoch 16
Loss = 5.5033e-03, PNorm = 148.3474, GNorm = 0.1656, lr_0 = 3.1599e-04
Loss = 4.5090e-03, PNorm = 148.3552, GNorm = 0.2541, lr_0 = 3.1577e-04
Loss = 5.9493e-03, PNorm = 148.3622, GNorm = 0.0918, lr_0 = 3.1556e-04
Loss = 6.0079e-03, PNorm = 148.3694, GNorm = 0.1592, lr_0 = 3.1534e-04
Loss = 4.2542e-03, PNorm = 148.3761, GNorm = 0.2700, lr_0 = 3.1512e-04
Loss = 4.0954e-03, PNorm = 148.3843, GNorm = 0.1363, lr_0 = 3.1491e-04
Loss = 4.0751e-03, PNorm = 148.3939, GNorm = 0.0955, lr_0 = 3.1469e-04
Loss = 4.7199e-03, PNorm = 148.4049, GNorm = 0.1078, lr_0 = 3.1448e-04
Loss = 4.5753e-03, PNorm = 148.4151, GNorm = 0.0893, lr_0 = 3.1426e-04
Loss = 4.8015e-03, PNorm = 148.4260, GNorm = 0.1716, lr_0 = 3.1405e-04
Loss = 3.6273e-03, PNorm = 148.4402, GNorm = 0.2069, lr_0 = 3.1383e-04
Loss = 4.0044e-03, PNorm = 148.4504, GNorm = 0.1369, lr_0 = 3.1362e-04
Loss = 4.6778e-03, PNorm = 148.4605, GNorm = 0.1544, lr_0 = 3.1340e-04
Loss = 4.4413e-03, PNorm = 148.4677, GNorm = 0.1422, lr_0 = 3.1319e-04
Loss = 5.5688e-03, PNorm = 148.4737, GNorm = 0.2041, lr_0 = 3.1297e-04
Loss = 5.8781e-03, PNorm = 148.4847, GNorm = 0.1254, lr_0 = 3.1276e-04
Loss = 4.3513e-03, PNorm = 148.4964, GNorm = 0.1438, lr_0 = 3.1254e-04
Loss = 4.2568e-03, PNorm = 148.5052, GNorm = 0.2722, lr_0 = 3.1233e-04
Loss = 5.6537e-03, PNorm = 148.5131, GNorm = 0.2524, lr_0 = 3.1212e-04
Loss = 3.6198e-03, PNorm = 148.5197, GNorm = 0.1815, lr_0 = 3.1190e-04
Loss = 4.3878e-03, PNorm = 148.5312, GNorm = 0.1247, lr_0 = 3.1169e-04
Loss = 3.3474e-03, PNorm = 148.5423, GNorm = 0.1463, lr_0 = 3.1147e-04
Loss = 3.8127e-03, PNorm = 148.5504, GNorm = 0.1558, lr_0 = 3.1126e-04
Loss = 4.1038e-03, PNorm = 148.5578, GNorm = 0.1141, lr_0 = 3.1105e-04
Loss = 4.0780e-03, PNorm = 148.5658, GNorm = 0.1898, lr_0 = 3.1083e-04
Loss = 4.3234e-03, PNorm = 148.5762, GNorm = 0.1022, lr_0 = 3.1062e-04
Loss = 3.7772e-03, PNorm = 148.5852, GNorm = 0.1251, lr_0 = 3.1041e-04
Loss = 4.1763e-03, PNorm = 148.5952, GNorm = 0.1853, lr_0 = 3.1020e-04
Loss = 4.3975e-03, PNorm = 148.6071, GNorm = 0.1820, lr_0 = 3.0998e-04
Loss = 3.6079e-03, PNorm = 148.6180, GNorm = 0.2419, lr_0 = 3.0977e-04
Loss = 3.8970e-03, PNorm = 148.6282, GNorm = 0.6386, lr_0 = 3.0956e-04
Loss = 4.3909e-03, PNorm = 148.6367, GNorm = 0.0957, lr_0 = 3.0935e-04
Loss = 3.9376e-03, PNorm = 148.6448, GNorm = 0.0847, lr_0 = 3.0914e-04
Loss = 5.4086e-03, PNorm = 148.6535, GNorm = 0.4616, lr_0 = 3.0892e-04
Loss = 4.1097e-03, PNorm = 148.6643, GNorm = 0.1656, lr_0 = 3.0871e-04
Loss = 4.4100e-03, PNorm = 148.6770, GNorm = 0.0833, lr_0 = 3.0850e-04
Loss = 3.6417e-03, PNorm = 148.6903, GNorm = 0.2622, lr_0 = 3.0829e-04
Loss = 3.8818e-03, PNorm = 148.7010, GNorm = 0.1177, lr_0 = 3.0808e-04
Loss = 3.0572e-03, PNorm = 148.7087, GNorm = 0.1495, lr_0 = 3.0787e-04
Loss = 4.5986e-03, PNorm = 148.7156, GNorm = 0.2067, lr_0 = 3.0766e-04
Loss = 7.8730e-03, PNorm = 148.7261, GNorm = 0.1758, lr_0 = 3.0745e-04
Loss = 5.5465e-03, PNorm = 148.7355, GNorm = 0.1780, lr_0 = 3.0723e-04
Loss = 5.0352e-03, PNorm = 148.7449, GNorm = 0.2034, lr_0 = 3.0702e-04
Loss = 6.3933e-03, PNorm = 148.7500, GNorm = 0.1381, lr_0 = 3.0681e-04
Loss = 3.8263e-03, PNorm = 148.7615, GNorm = 0.2249, lr_0 = 3.0660e-04
Loss = 4.2664e-03, PNorm = 148.7717, GNorm = 0.1462, lr_0 = 3.0639e-04
Loss = 4.7218e-03, PNorm = 148.7795, GNorm = 0.0757, lr_0 = 3.0618e-04
Loss = 3.8204e-03, PNorm = 148.7852, GNorm = 0.2966, lr_0 = 3.0597e-04
Loss = 3.4131e-03, PNorm = 148.7955, GNorm = 0.1222, lr_0 = 3.0576e-04
Loss = 4.8802e-03, PNorm = 148.8033, GNorm = 0.1245, lr_0 = 3.0555e-04
Loss = 5.3353e-03, PNorm = 148.8150, GNorm = 0.1972, lr_0 = 3.0535e-04
Loss = 5.3277e-03, PNorm = 148.8291, GNorm = 0.1322, lr_0 = 3.0514e-04
Loss = 4.1218e-03, PNorm = 148.8408, GNorm = 0.3335, lr_0 = 3.0493e-04
Loss = 3.9430e-03, PNorm = 148.8520, GNorm = 0.1612, lr_0 = 3.0472e-04
Loss = 3.3626e-03, PNorm = 148.8585, GNorm = 0.0961, lr_0 = 3.0451e-04
Loss = 3.9787e-03, PNorm = 148.8700, GNorm = 0.1363, lr_0 = 3.0430e-04
Loss = 5.9814e-03, PNorm = 148.8770, GNorm = 0.1576, lr_0 = 3.0409e-04
Loss = 4.2126e-03, PNorm = 148.8860, GNorm = 0.1603, lr_0 = 3.0388e-04
Loss = 4.1665e-03, PNorm = 148.8987, GNorm = 0.2710, lr_0 = 3.0368e-04
Loss = 6.2414e-03, PNorm = 148.9081, GNorm = 0.1503, lr_0 = 3.0347e-04
Loss = 3.5854e-03, PNorm = 148.9170, GNorm = 0.1389, lr_0 = 3.0326e-04
Loss = 3.6764e-03, PNorm = 148.9263, GNorm = 0.0917, lr_0 = 3.0305e-04
Loss = 4.6398e-03, PNorm = 148.9318, GNorm = 0.1782, lr_0 = 3.0284e-04
Loss = 3.9942e-03, PNorm = 148.9420, GNorm = 0.1188, lr_0 = 3.0264e-04
Loss = 6.9914e-03, PNorm = 148.9550, GNorm = 0.2356, lr_0 = 3.0243e-04
Loss = 4.7774e-03, PNorm = 148.9678, GNorm = 0.3119, lr_0 = 3.0222e-04
Loss = 5.1866e-03, PNorm = 148.9796, GNorm = 0.1207, lr_0 = 3.0202e-04
Loss = 3.1892e-03, PNorm = 148.9920, GNorm = 0.1617, lr_0 = 3.0181e-04
Loss = 3.4739e-03, PNorm = 149.0008, GNorm = 0.1924, lr_0 = 3.0160e-04
Loss = 4.0517e-03, PNorm = 149.0090, GNorm = 0.0655, lr_0 = 3.0140e-04
Loss = 4.0086e-03, PNorm = 149.0181, GNorm = 0.1622, lr_0 = 3.0119e-04
Loss = 4.8378e-03, PNorm = 149.0275, GNorm = 0.1427, lr_0 = 3.0098e-04
Loss = 4.8936e-03, PNorm = 149.0391, GNorm = 0.1453, lr_0 = 3.0078e-04
Loss = 4.0440e-03, PNorm = 149.0502, GNorm = 0.0695, lr_0 = 3.0057e-04
Loss = 3.9219e-03, PNorm = 149.0595, GNorm = 0.0768, lr_0 = 3.0036e-04
Loss = 7.5333e-03, PNorm = 149.0647, GNorm = 0.1237, lr_0 = 3.0016e-04
Loss = 5.7165e-03, PNorm = 149.0739, GNorm = 0.1882, lr_0 = 2.9995e-04
Loss = 3.8154e-03, PNorm = 149.0791, GNorm = 0.0649, lr_0 = 2.9975e-04
Loss = 5.7580e-03, PNorm = 149.0876, GNorm = 0.1366, lr_0 = 2.9954e-04
Loss = 3.9387e-03, PNorm = 149.0960, GNorm = 0.2029, lr_0 = 2.9934e-04
Loss = 4.6583e-03, PNorm = 149.1064, GNorm = 0.0952, lr_0 = 2.9913e-04
Loss = 3.1603e-03, PNorm = 149.1183, GNorm = 0.0887, lr_0 = 2.9893e-04
Loss = 3.7648e-03, PNorm = 149.1323, GNorm = 0.1668, lr_0 = 2.9872e-04
Loss = 3.6101e-03, PNorm = 149.1453, GNorm = 0.2234, lr_0 = 2.9852e-04
Loss = 7.4644e-03, PNorm = 149.1573, GNorm = 0.1069, lr_0 = 2.9831e-04
Loss = 5.2103e-03, PNorm = 149.1642, GNorm = 0.3044, lr_0 = 2.9811e-04
Loss = 5.9519e-03, PNorm = 149.1747, GNorm = 0.2719, lr_0 = 2.9790e-04
Loss = 6.3136e-03, PNorm = 149.1832, GNorm = 0.2051, lr_0 = 2.9770e-04
Loss = 5.7470e-03, PNorm = 149.1960, GNorm = 0.2072, lr_0 = 2.9750e-04
Loss = 3.8317e-03, PNorm = 149.2081, GNorm = 0.3040, lr_0 = 2.9729e-04
Loss = 4.9104e-03, PNorm = 149.2189, GNorm = 0.4119, lr_0 = 2.9709e-04
Loss = 3.7264e-03, PNorm = 149.2298, GNorm = 0.2234, lr_0 = 2.9689e-04
Loss = 3.7430e-03, PNorm = 149.2432, GNorm = 0.1640, lr_0 = 2.9668e-04
Loss = 4.2270e-03, PNorm = 149.2524, GNorm = 0.1716, lr_0 = 2.9648e-04
Loss = 5.0898e-03, PNorm = 149.2612, GNorm = 0.3161, lr_0 = 2.9628e-04
Loss = 4.0326e-03, PNorm = 149.2693, GNorm = 0.0913, lr_0 = 2.9607e-04
Loss = 4.3329e-03, PNorm = 149.2795, GNorm = 0.1832, lr_0 = 2.9587e-04
Loss = 3.8326e-03, PNorm = 149.2906, GNorm = 0.2058, lr_0 = 2.9567e-04
Loss = 4.1889e-03, PNorm = 149.3000, GNorm = 0.0956, lr_0 = 2.9546e-04
Loss = 5.0058e-03, PNorm = 149.3088, GNorm = 0.2657, lr_0 = 2.9526e-04
Loss = 5.5097e-03, PNorm = 149.3190, GNorm = 0.0798, lr_0 = 2.9506e-04
Loss = 5.2559e-03, PNorm = 149.3325, GNorm = 0.1422, lr_0 = 2.9486e-04
Loss = 5.6517e-03, PNorm = 149.3399, GNorm = 0.1660, lr_0 = 2.9466e-04
Loss = 3.8434e-03, PNorm = 149.3502, GNorm = 0.1112, lr_0 = 2.9445e-04
Loss = 6.3644e-03, PNorm = 149.3620, GNorm = 0.1659, lr_0 = 2.9425e-04
Loss = 4.7795e-03, PNorm = 149.3732, GNorm = 0.2811, lr_0 = 2.9405e-04
Loss = 5.8946e-03, PNorm = 149.3840, GNorm = 0.1416, lr_0 = 2.9385e-04
Loss = 5.1912e-03, PNorm = 149.3957, GNorm = 0.1858, lr_0 = 2.9365e-04
Loss = 4.7161e-03, PNorm = 149.4058, GNorm = 0.1417, lr_0 = 2.9345e-04
Loss = 4.7741e-03, PNorm = 149.4202, GNorm = 0.4009, lr_0 = 2.9325e-04
Loss = 3.9254e-03, PNorm = 149.4297, GNorm = 0.2245, lr_0 = 2.9305e-04
Loss = 5.0572e-03, PNorm = 149.4387, GNorm = 0.1717, lr_0 = 2.9284e-04
Loss = 6.5870e-03, PNorm = 149.4503, GNorm = 0.2796, lr_0 = 2.9264e-04
Loss = 3.7527e-03, PNorm = 149.4594, GNorm = 0.2425, lr_0 = 2.9244e-04
Loss = 4.5806e-03, PNorm = 149.4648, GNorm = 0.1574, lr_0 = 2.9224e-04
Loss = 3.9621e-03, PNorm = 149.4780, GNorm = 0.4314, lr_0 = 2.9204e-04
Loss = 6.7961e-03, PNorm = 149.4901, GNorm = 0.1414, lr_0 = 2.9184e-04
Loss = 5.1787e-03, PNorm = 149.5017, GNorm = 0.2741, lr_0 = 2.9164e-04
Loss = 4.1902e-03, PNorm = 149.5097, GNorm = 0.1288, lr_0 = 2.9144e-04
Loss = 5.5940e-03, PNorm = 149.5182, GNorm = 0.3571, lr_0 = 2.9124e-04
Validation mae = 0.479431
Epoch 17
Loss = 3.2415e-03, PNorm = 149.5250, GNorm = 0.2058, lr_0 = 2.9104e-04
Loss = 3.8733e-03, PNorm = 149.5321, GNorm = 0.1862, lr_0 = 2.9084e-04
Loss = 4.4451e-03, PNorm = 149.5365, GNorm = 0.2863, lr_0 = 2.9065e-04
Loss = 4.3102e-03, PNorm = 149.5450, GNorm = 0.1894, lr_0 = 2.9045e-04
Loss = 4.1240e-03, PNorm = 149.5510, GNorm = 0.0791, lr_0 = 2.9025e-04
Loss = 3.2773e-03, PNorm = 149.5597, GNorm = 0.1265, lr_0 = 2.9005e-04
Loss = 3.3760e-03, PNorm = 149.5698, GNorm = 0.1399, lr_0 = 2.8985e-04
Loss = 6.2413e-03, PNorm = 149.5759, GNorm = 0.3379, lr_0 = 2.8965e-04
Loss = 3.5422e-03, PNorm = 149.5802, GNorm = 0.1967, lr_0 = 2.8945e-04
Loss = 5.2055e-03, PNorm = 149.5889, GNorm = 0.3446, lr_0 = 2.8925e-04
Loss = 2.9593e-03, PNorm = 149.5973, GNorm = 0.0896, lr_0 = 2.8906e-04
Loss = 3.8755e-03, PNorm = 149.6065, GNorm = 0.1122, lr_0 = 2.8886e-04
Loss = 3.3442e-03, PNorm = 149.6144, GNorm = 0.1025, lr_0 = 2.8866e-04
Loss = 4.0254e-03, PNorm = 149.6229, GNorm = 0.0740, lr_0 = 2.8846e-04
Loss = 5.3886e-03, PNorm = 149.6313, GNorm = 0.2359, lr_0 = 2.8826e-04
Loss = 4.5492e-03, PNorm = 149.6369, GNorm = 0.1470, lr_0 = 2.8807e-04
Loss = 4.8792e-03, PNorm = 149.6416, GNorm = 0.1639, lr_0 = 2.8787e-04
Loss = 4.1823e-03, PNorm = 149.6481, GNorm = 0.3052, lr_0 = 2.8767e-04
Loss = 3.7793e-03, PNorm = 149.6561, GNorm = 0.1832, lr_0 = 2.8748e-04
Loss = 3.6092e-03, PNorm = 149.6654, GNorm = 0.1285, lr_0 = 2.8728e-04
Loss = 3.4388e-03, PNorm = 149.6716, GNorm = 0.1113, lr_0 = 2.8708e-04
Loss = 4.5794e-03, PNorm = 149.6753, GNorm = 0.1688, lr_0 = 2.8689e-04
Loss = 3.6406e-03, PNorm = 149.6850, GNorm = 0.2125, lr_0 = 2.8669e-04
Loss = 5.1200e-03, PNorm = 149.6937, GNorm = 0.1679, lr_0 = 2.8649e-04
Loss = 3.5293e-03, PNorm = 149.6990, GNorm = 0.0882, lr_0 = 2.8630e-04
Loss = 4.3324e-03, PNorm = 149.7056, GNorm = 0.3043, lr_0 = 2.8610e-04
Loss = 3.6340e-03, PNorm = 149.7122, GNorm = 0.1848, lr_0 = 2.8590e-04
Loss = 5.6864e-03, PNorm = 149.7196, GNorm = 0.1915, lr_0 = 2.8571e-04
Loss = 5.9261e-03, PNorm = 149.7277, GNorm = 0.1332, lr_0 = 2.8551e-04
Loss = 3.5591e-03, PNorm = 149.7323, GNorm = 0.0966, lr_0 = 2.8532e-04
Loss = 3.2141e-03, PNorm = 149.7390, GNorm = 0.0728, lr_0 = 2.8512e-04
Loss = 4.5299e-03, PNorm = 149.7489, GNorm = 0.1963, lr_0 = 2.8493e-04
Loss = 3.1687e-03, PNorm = 149.7581, GNorm = 0.0978, lr_0 = 2.8473e-04
Loss = 3.5238e-03, PNorm = 149.7647, GNorm = 0.3644, lr_0 = 2.8454e-04
Loss = 3.2449e-03, PNorm = 149.7698, GNorm = 0.1282, lr_0 = 2.8434e-04
Loss = 3.2791e-03, PNorm = 149.7747, GNorm = 0.3862, lr_0 = 2.8415e-04
Loss = 3.8888e-03, PNorm = 149.7811, GNorm = 0.1839, lr_0 = 2.8395e-04
Loss = 4.3785e-03, PNorm = 149.7894, GNorm = 0.2699, lr_0 = 2.8376e-04
Loss = 3.3224e-03, PNorm = 149.7957, GNorm = 0.2399, lr_0 = 2.8356e-04
Loss = 4.7307e-03, PNorm = 149.8063, GNorm = 0.2766, lr_0 = 2.8337e-04
Loss = 5.2511e-03, PNorm = 149.8169, GNorm = 0.2115, lr_0 = 2.8317e-04
Loss = 3.2267e-03, PNorm = 149.8263, GNorm = 0.0678, lr_0 = 2.8298e-04
Loss = 3.1382e-03, PNorm = 149.8367, GNorm = 0.1246, lr_0 = 2.8279e-04
Loss = 2.8612e-03, PNorm = 149.8457, GNorm = 0.1764, lr_0 = 2.8259e-04
Loss = 3.3099e-03, PNorm = 149.8535, GNorm = 0.1704, lr_0 = 2.8240e-04
Loss = 3.6639e-03, PNorm = 149.8579, GNorm = 0.2126, lr_0 = 2.8221e-04
Loss = 4.0392e-03, PNorm = 149.8676, GNorm = 0.3304, lr_0 = 2.8201e-04
Loss = 4.0853e-03, PNorm = 149.8779, GNorm = 0.3312, lr_0 = 2.8182e-04
Loss = 3.3897e-03, PNorm = 149.8876, GNorm = 0.2341, lr_0 = 2.8163e-04
Loss = 3.6677e-03, PNorm = 149.8976, GNorm = 0.1438, lr_0 = 2.8143e-04
Loss = 2.8761e-03, PNorm = 149.9062, GNorm = 0.2192, lr_0 = 2.8124e-04
Loss = 3.2481e-03, PNorm = 149.9142, GNorm = 0.1181, lr_0 = 2.8105e-04
Loss = 3.7022e-03, PNorm = 149.9198, GNorm = 0.0970, lr_0 = 2.8085e-04
Loss = 3.9139e-03, PNorm = 149.9279, GNorm = 0.1353, lr_0 = 2.8066e-04
Loss = 4.2721e-03, PNorm = 149.9370, GNorm = 0.2712, lr_0 = 2.8047e-04
Loss = 4.6386e-03, PNorm = 149.9443, GNorm = 0.3559, lr_0 = 2.8028e-04
Loss = 5.1496e-03, PNorm = 149.9535, GNorm = 0.1186, lr_0 = 2.8009e-04
Loss = 3.3162e-03, PNorm = 149.9609, GNorm = 0.1183, lr_0 = 2.7989e-04
Loss = 3.1852e-03, PNorm = 149.9661, GNorm = 0.1456, lr_0 = 2.7970e-04
Loss = 2.9520e-03, PNorm = 149.9730, GNorm = 0.1345, lr_0 = 2.7951e-04
Loss = 3.0034e-03, PNorm = 149.9802, GNorm = 0.1776, lr_0 = 2.7932e-04
Loss = 4.0256e-03, PNorm = 149.9879, GNorm = 0.3436, lr_0 = 2.7913e-04
Loss = 4.2866e-03, PNorm = 149.9986, GNorm = 0.2624, lr_0 = 2.7894e-04
Loss = 3.3241e-03, PNorm = 150.0087, GNorm = 0.2125, lr_0 = 2.7875e-04
Loss = 4.2546e-03, PNorm = 150.0165, GNorm = 0.2039, lr_0 = 2.7855e-04
Loss = 4.4236e-03, PNorm = 150.0235, GNorm = 0.0879, lr_0 = 2.7836e-04
Loss = 3.6313e-03, PNorm = 150.0347, GNorm = 0.1401, lr_0 = 2.7817e-04
Loss = 6.3680e-03, PNorm = 150.0471, GNorm = 0.1855, lr_0 = 2.7798e-04
Loss = 4.1712e-03, PNorm = 150.0589, GNorm = 0.1253, lr_0 = 2.7779e-04
Loss = 3.3517e-03, PNorm = 150.0675, GNorm = 0.1080, lr_0 = 2.7760e-04
Loss = 3.6579e-03, PNorm = 150.0727, GNorm = 0.1013, lr_0 = 2.7741e-04
Loss = 4.4068e-03, PNorm = 150.0788, GNorm = 0.2110, lr_0 = 2.7722e-04
Loss = 5.3035e-03, PNorm = 150.0819, GNorm = 0.2362, lr_0 = 2.7703e-04
Loss = 3.7540e-03, PNorm = 150.0863, GNorm = 0.1355, lr_0 = 2.7684e-04
Loss = 3.9472e-03, PNorm = 150.0932, GNorm = 0.2776, lr_0 = 2.7665e-04
Loss = 4.1803e-03, PNorm = 150.1048, GNorm = 0.1012, lr_0 = 2.7646e-04
Loss = 5.1898e-03, PNorm = 150.1153, GNorm = 0.2580, lr_0 = 2.7627e-04
Loss = 3.4687e-03, PNorm = 150.1243, GNorm = 0.1993, lr_0 = 2.7608e-04
Loss = 4.1364e-03, PNorm = 150.1329, GNorm = 0.1189, lr_0 = 2.7590e-04
Loss = 4.6753e-03, PNorm = 150.1383, GNorm = 0.2018, lr_0 = 2.7571e-04
Loss = 3.4436e-03, PNorm = 150.1482, GNorm = 0.1613, lr_0 = 2.7552e-04
Loss = 6.3442e-03, PNorm = 150.1618, GNorm = 0.2737, lr_0 = 2.7533e-04
Loss = 3.7032e-03, PNorm = 150.1724, GNorm = 0.1886, lr_0 = 2.7514e-04
Loss = 3.4489e-03, PNorm = 150.1814, GNorm = 0.3876, lr_0 = 2.7495e-04
Loss = 4.6641e-03, PNorm = 150.1904, GNorm = 0.4584, lr_0 = 2.7476e-04
Loss = 2.6446e-03, PNorm = 150.1997, GNorm = 0.0811, lr_0 = 2.7457e-04
Loss = 4.3137e-03, PNorm = 150.2061, GNorm = 0.1341, lr_0 = 2.7439e-04
Loss = 4.2532e-03, PNorm = 150.2128, GNorm = 0.3031, lr_0 = 2.7420e-04
Loss = 4.9165e-03, PNorm = 150.2210, GNorm = 0.1270, lr_0 = 2.7401e-04
Loss = 3.7998e-03, PNorm = 150.2312, GNorm = 0.0838, lr_0 = 2.7382e-04
Loss = 3.6628e-03, PNorm = 150.2419, GNorm = 0.3999, lr_0 = 2.7364e-04
Loss = 3.3653e-03, PNorm = 150.2531, GNorm = 0.2717, lr_0 = 2.7345e-04
Loss = 6.0819e-03, PNorm = 150.2633, GNorm = 0.1086, lr_0 = 2.7326e-04
Loss = 3.2284e-03, PNorm = 150.2721, GNorm = 0.1251, lr_0 = 2.7307e-04
Loss = 2.2556e-03, PNorm = 150.2820, GNorm = 0.1187, lr_0 = 2.7289e-04
Loss = 3.5713e-03, PNorm = 150.2882, GNorm = 0.3419, lr_0 = 2.7270e-04
Loss = 3.8884e-03, PNorm = 150.2934, GNorm = 0.1917, lr_0 = 2.7251e-04
Loss = 3.0015e-03, PNorm = 150.2987, GNorm = 0.0762, lr_0 = 2.7233e-04
Loss = 2.8529e-03, PNorm = 150.3058, GNorm = 0.1016, lr_0 = 2.7214e-04
Loss = 3.6940e-03, PNorm = 150.3163, GNorm = 0.2853, lr_0 = 2.7195e-04
Loss = 3.1052e-03, PNorm = 150.3236, GNorm = 0.1576, lr_0 = 2.7177e-04
Loss = 3.9060e-03, PNorm = 150.3301, GNorm = 0.3264, lr_0 = 2.7158e-04
Loss = 4.2742e-03, PNorm = 150.3362, GNorm = 0.2555, lr_0 = 2.7139e-04
Loss = 3.3307e-03, PNorm = 150.3462, GNorm = 0.2222, lr_0 = 2.7121e-04
Loss = 5.5723e-03, PNorm = 150.3545, GNorm = 0.1012, lr_0 = 2.7102e-04
Loss = 3.5131e-03, PNorm = 150.3618, GNorm = 0.0977, lr_0 = 2.7084e-04
Loss = 3.4606e-03, PNorm = 150.3683, GNorm = 0.0584, lr_0 = 2.7065e-04
Loss = 4.0563e-03, PNorm = 150.3747, GNorm = 0.1768, lr_0 = 2.7047e-04
Loss = 5.8449e-03, PNorm = 150.3815, GNorm = 0.1083, lr_0 = 2.7028e-04
Loss = 4.8125e-03, PNorm = 150.3884, GNorm = 0.1352, lr_0 = 2.7010e-04
Loss = 7.0384e-03, PNorm = 150.3984, GNorm = 0.1854, lr_0 = 2.6991e-04
Loss = 3.1885e-03, PNorm = 150.4090, GNorm = 0.1854, lr_0 = 2.6973e-04
Loss = 3.2353e-03, PNorm = 150.4175, GNorm = 0.1513, lr_0 = 2.6954e-04
Loss = 3.5406e-03, PNorm = 150.4264, GNorm = 0.1206, lr_0 = 2.6936e-04
Loss = 4.8666e-03, PNorm = 150.4335, GNorm = 0.1315, lr_0 = 2.6917e-04
Loss = 3.3029e-03, PNorm = 150.4430, GNorm = 0.1073, lr_0 = 2.6899e-04
Loss = 4.0761e-03, PNorm = 150.4499, GNorm = 0.3773, lr_0 = 2.6880e-04
Loss = 3.0545e-03, PNorm = 150.4601, GNorm = 0.3436, lr_0 = 2.6862e-04
Loss = 3.0879e-03, PNorm = 150.4698, GNorm = 0.0925, lr_0 = 2.6844e-04
Loss = 3.2815e-03, PNorm = 150.4784, GNorm = 0.0602, lr_0 = 2.6825e-04
Validation mae = 0.477903
Epoch 18
Loss = 3.8577e-03, PNorm = 150.4833, GNorm = 0.2592, lr_0 = 2.6807e-04
Loss = 3.4743e-03, PNorm = 150.4902, GNorm = 0.1807, lr_0 = 2.6788e-04
Loss = 3.3501e-03, PNorm = 150.4967, GNorm = 0.1147, lr_0 = 2.6770e-04
Loss = 4.7604e-03, PNorm = 150.5030, GNorm = 0.1583, lr_0 = 2.6752e-04
Loss = 2.9772e-03, PNorm = 150.5064, GNorm = 0.1559, lr_0 = 2.6733e-04
Loss = 2.4440e-03, PNorm = 150.5130, GNorm = 0.0877, lr_0 = 2.6715e-04
Loss = 3.5568e-03, PNorm = 150.5162, GNorm = 0.1051, lr_0 = 2.6697e-04
Loss = 2.6026e-03, PNorm = 150.5213, GNorm = 0.0593, lr_0 = 2.6678e-04
Loss = 2.1915e-03, PNorm = 150.5304, GNorm = 0.1864, lr_0 = 2.6660e-04
Loss = 3.0508e-03, PNorm = 150.5367, GNorm = 0.2381, lr_0 = 2.6642e-04
Loss = 2.7947e-03, PNorm = 150.5429, GNorm = 0.0701, lr_0 = 2.6624e-04
Loss = 3.5022e-03, PNorm = 150.5507, GNorm = 0.2670, lr_0 = 2.6605e-04
Loss = 3.0387e-03, PNorm = 150.5572, GNorm = 0.1496, lr_0 = 2.6587e-04
Loss = 2.8348e-03, PNorm = 150.5604, GNorm = 0.1686, lr_0 = 2.6569e-04
Loss = 4.7819e-03, PNorm = 150.5635, GNorm = 0.3292, lr_0 = 2.6551e-04
Loss = 3.6597e-03, PNorm = 150.5688, GNorm = 0.2005, lr_0 = 2.6533e-04
Loss = 3.1271e-03, PNorm = 150.5753, GNorm = 0.0605, lr_0 = 2.6514e-04
Loss = 3.0010e-03, PNorm = 150.5844, GNorm = 0.1816, lr_0 = 2.6496e-04
Loss = 3.2562e-03, PNorm = 150.5916, GNorm = 0.1280, lr_0 = 2.6478e-04
Loss = 4.9812e-03, PNorm = 150.5995, GNorm = 0.3399, lr_0 = 2.6460e-04
Loss = 2.8455e-03, PNorm = 150.6039, GNorm = 0.1182, lr_0 = 2.6442e-04
Loss = 3.3499e-03, PNorm = 150.6085, GNorm = 0.1705, lr_0 = 2.6424e-04
Loss = 4.1522e-03, PNorm = 150.6121, GNorm = 0.2518, lr_0 = 2.6406e-04
Loss = 2.4849e-03, PNorm = 150.6177, GNorm = 0.0644, lr_0 = 2.6388e-04
Loss = 3.6477e-03, PNorm = 150.6247, GNorm = 0.2961, lr_0 = 2.6369e-04
Loss = 2.7685e-03, PNorm = 150.6310, GNorm = 0.0872, lr_0 = 2.6351e-04
Loss = 2.6247e-03, PNorm = 150.6383, GNorm = 0.1853, lr_0 = 2.6333e-04
Loss = 3.1442e-03, PNorm = 150.6448, GNorm = 0.1036, lr_0 = 2.6315e-04
Loss = 2.5790e-03, PNorm = 150.6492, GNorm = 0.0445, lr_0 = 2.6297e-04
Loss = 3.2688e-03, PNorm = 150.6540, GNorm = 0.0812, lr_0 = 2.6279e-04
Loss = 5.2274e-03, PNorm = 150.6611, GNorm = 0.0690, lr_0 = 2.6261e-04
Loss = 4.7001e-03, PNorm = 150.6713, GNorm = 0.2304, lr_0 = 2.6243e-04
Loss = 2.9161e-03, PNorm = 150.6769, GNorm = 0.0453, lr_0 = 2.6225e-04
Loss = 6.2451e-03, PNorm = 150.6799, GNorm = 0.3770, lr_0 = 2.6207e-04
Loss = 2.6606e-03, PNorm = 150.6842, GNorm = 0.1454, lr_0 = 2.6189e-04
Loss = 4.3381e-03, PNorm = 150.6904, GNorm = 0.1564, lr_0 = 2.6171e-04
Loss = 2.7528e-03, PNorm = 150.6994, GNorm = 0.2828, lr_0 = 2.6153e-04
Loss = 3.0805e-03, PNorm = 150.7043, GNorm = 0.2476, lr_0 = 2.6136e-04
Loss = 2.9305e-03, PNorm = 150.7113, GNorm = 0.2094, lr_0 = 2.6118e-04
Loss = 3.0940e-03, PNorm = 150.7172, GNorm = 0.1844, lr_0 = 2.6100e-04
Loss = 3.3917e-03, PNorm = 150.7236, GNorm = 0.1123, lr_0 = 2.6082e-04
Loss = 3.2371e-03, PNorm = 150.7317, GNorm = 0.4906, lr_0 = 2.6064e-04
Loss = 3.3921e-03, PNorm = 150.7389, GNorm = 0.1851, lr_0 = 2.6046e-04
Loss = 3.2638e-03, PNorm = 150.7475, GNorm = 0.1919, lr_0 = 2.6028e-04
Loss = 2.6653e-03, PNorm = 150.7541, GNorm = 0.1757, lr_0 = 2.6011e-04
Loss = 6.2447e-03, PNorm = 150.7586, GNorm = 0.0805, lr_0 = 2.5993e-04
Loss = 2.9868e-03, PNorm = 150.7648, GNorm = 0.3155, lr_0 = 2.5975e-04
Loss = 3.4958e-03, PNorm = 150.7723, GNorm = 0.2270, lr_0 = 2.5957e-04
Loss = 4.1295e-03, PNorm = 150.7783, GNorm = 0.0886, lr_0 = 2.5939e-04
Loss = 3.2001e-03, PNorm = 150.7845, GNorm = 0.2235, lr_0 = 2.5922e-04
Loss = 3.0554e-03, PNorm = 150.7888, GNorm = 0.2329, lr_0 = 2.5904e-04
Loss = 3.1721e-03, PNorm = 150.7959, GNorm = 0.2099, lr_0 = 2.5886e-04
Loss = 2.8847e-03, PNorm = 150.8008, GNorm = 0.1029, lr_0 = 2.5868e-04
Loss = 3.4679e-03, PNorm = 150.8079, GNorm = 0.2418, lr_0 = 2.5851e-04
Loss = 2.4580e-03, PNorm = 150.8152, GNorm = 0.0701, lr_0 = 2.5833e-04
Loss = 3.8949e-03, PNorm = 150.8194, GNorm = 0.0594, lr_0 = 2.5815e-04
Loss = 2.6343e-03, PNorm = 150.8254, GNorm = 0.1209, lr_0 = 2.5797e-04
Loss = 3.4464e-03, PNorm = 150.8279, GNorm = 0.1445, lr_0 = 2.5780e-04
Loss = 2.6442e-03, PNorm = 150.8357, GNorm = 0.2226, lr_0 = 2.5762e-04
Loss = 3.0516e-03, PNorm = 150.8417, GNorm = 0.2436, lr_0 = 2.5745e-04
Loss = 4.1627e-03, PNorm = 150.8498, GNorm = 0.2477, lr_0 = 2.5727e-04
Loss = 4.5528e-03, PNorm = 150.8603, GNorm = 0.1244, lr_0 = 2.5709e-04
Loss = 2.9793e-03, PNorm = 150.8683, GNorm = 0.1901, lr_0 = 2.5692e-04
Loss = 3.8126e-03, PNorm = 150.8755, GNorm = 0.1798, lr_0 = 2.5674e-04
Loss = 3.1873e-03, PNorm = 150.8842, GNorm = 0.0856, lr_0 = 2.5656e-04
Loss = 2.5179e-03, PNorm = 150.8926, GNorm = 0.1787, lr_0 = 2.5639e-04
Loss = 4.4033e-03, PNorm = 150.9006, GNorm = 0.2141, lr_0 = 2.5621e-04
Loss = 2.8178e-03, PNorm = 150.9086, GNorm = 0.0591, lr_0 = 2.5604e-04
Loss = 3.3284e-03, PNorm = 150.9181, GNorm = 0.0961, lr_0 = 2.5586e-04
Loss = 5.8975e-03, PNorm = 150.9227, GNorm = 0.3352, lr_0 = 2.5569e-04
Loss = 3.1424e-03, PNorm = 150.9284, GNorm = 0.0577, lr_0 = 2.5551e-04
Loss = 4.0649e-03, PNorm = 150.9358, GNorm = 0.0749, lr_0 = 2.5534e-04
Loss = 3.7379e-03, PNorm = 150.9454, GNorm = 0.1361, lr_0 = 2.5516e-04
Loss = 2.7716e-03, PNorm = 150.9556, GNorm = 0.1259, lr_0 = 2.5499e-04
Loss = 3.3652e-03, PNorm = 150.9651, GNorm = 0.0533, lr_0 = 2.5481e-04
Loss = 2.9006e-03, PNorm = 150.9720, GNorm = 0.0914, lr_0 = 2.5464e-04
Loss = 4.4315e-03, PNorm = 150.9797, GNorm = 0.1106, lr_0 = 2.5446e-04
Loss = 3.4878e-03, PNorm = 150.9887, GNorm = 0.0792, lr_0 = 2.5429e-04
Loss = 3.0710e-03, PNorm = 150.9956, GNorm = 0.0506, lr_0 = 2.5411e-04
Loss = 4.8126e-03, PNorm = 151.0018, GNorm = 0.3293, lr_0 = 2.5394e-04
Loss = 5.1023e-03, PNorm = 151.0057, GNorm = 0.0606, lr_0 = 2.5377e-04
Loss = 3.0490e-03, PNorm = 151.0135, GNorm = 0.2774, lr_0 = 2.5359e-04
Loss = 3.9148e-03, PNorm = 151.0200, GNorm = 0.1278, lr_0 = 2.5342e-04
Loss = 3.4300e-03, PNorm = 151.0249, GNorm = 0.1232, lr_0 = 2.5325e-04
Loss = 6.4552e-03, PNorm = 151.0344, GNorm = 0.3340, lr_0 = 2.5307e-04
Loss = 2.9130e-03, PNorm = 151.0420, GNorm = 0.2237, lr_0 = 2.5290e-04
Loss = 3.3440e-03, PNorm = 151.0521, GNorm = 0.1997, lr_0 = 2.5273e-04
Loss = 3.3563e-03, PNorm = 151.0608, GNorm = 0.2529, lr_0 = 2.5255e-04
Loss = 3.3883e-03, PNorm = 151.0673, GNorm = 0.2351, lr_0 = 2.5238e-04
Loss = 2.9739e-03, PNorm = 151.0739, GNorm = 0.0920, lr_0 = 2.5221e-04
Loss = 4.3169e-03, PNorm = 151.0830, GNorm = 0.2416, lr_0 = 2.5203e-04
Loss = 3.2177e-03, PNorm = 151.0946, GNorm = 0.2684, lr_0 = 2.5186e-04
Loss = 2.6555e-03, PNorm = 151.1049, GNorm = 0.2167, lr_0 = 2.5169e-04
Loss = 3.1269e-03, PNorm = 151.1160, GNorm = 0.2196, lr_0 = 2.5152e-04
Loss = 3.4856e-03, PNorm = 151.1246, GNorm = 0.1727, lr_0 = 2.5134e-04
Loss = 3.1045e-03, PNorm = 151.1322, GNorm = 0.3526, lr_0 = 2.5117e-04
Loss = 2.5129e-03, PNorm = 151.1387, GNorm = 0.1956, lr_0 = 2.5100e-04
Loss = 8.1249e-03, PNorm = 151.1444, GNorm = 0.2227, lr_0 = 2.5083e-04
Loss = 4.2653e-03, PNorm = 151.1510, GNorm = 0.2217, lr_0 = 2.5066e-04
Loss = 3.9419e-03, PNorm = 151.1612, GNorm = 0.3466, lr_0 = 2.5048e-04
Loss = 4.0732e-03, PNorm = 151.1668, GNorm = 0.2177, lr_0 = 2.5031e-04
Loss = 2.8160e-03, PNorm = 151.1759, GNorm = 0.2103, lr_0 = 2.5014e-04
Loss = 4.5316e-03, PNorm = 151.1845, GNorm = 0.1780, lr_0 = 2.4997e-04
Loss = 4.1321e-03, PNorm = 151.1937, GNorm = 0.1162, lr_0 = 2.4980e-04
Loss = 3.3270e-03, PNorm = 151.2013, GNorm = 0.1434, lr_0 = 2.4963e-04
Loss = 2.8187e-03, PNorm = 151.2074, GNorm = 0.2683, lr_0 = 2.4946e-04
Loss = 3.6659e-03, PNorm = 151.2124, GNorm = 0.1986, lr_0 = 2.4929e-04
Loss = 3.1596e-03, PNorm = 151.2181, GNorm = 0.0735, lr_0 = 2.4911e-04
Loss = 3.1388e-03, PNorm = 151.2249, GNorm = 0.1886, lr_0 = 2.4894e-04
Loss = 3.1259e-03, PNorm = 151.2322, GNorm = 0.1264, lr_0 = 2.4877e-04
Loss = 3.3585e-03, PNorm = 151.2393, GNorm = 0.1106, lr_0 = 2.4860e-04
Loss = 3.6606e-03, PNorm = 151.2467, GNorm = 0.0821, lr_0 = 2.4843e-04
Loss = 2.7035e-03, PNorm = 151.2549, GNorm = 0.1507, lr_0 = 2.4826e-04
Loss = 2.0941e-03, PNorm = 151.2597, GNorm = 0.0998, lr_0 = 2.4809e-04
Loss = 3.0525e-03, PNorm = 151.2654, GNorm = 0.3143, lr_0 = 2.4792e-04
Loss = 2.7465e-03, PNorm = 151.2694, GNorm = 0.3014, lr_0 = 2.4775e-04
Loss = 2.9590e-03, PNorm = 151.2757, GNorm = 0.1103, lr_0 = 2.4758e-04
Loss = 4.4134e-03, PNorm = 151.2807, GNorm = 0.0726, lr_0 = 2.4741e-04
Loss = 2.6204e-03, PNorm = 151.2859, GNorm = 0.1236, lr_0 = 2.4724e-04
Loss = 3.0880e-03, PNorm = 151.2934, GNorm = 0.1277, lr_0 = 2.4707e-04
Validation mae = 0.477705
Epoch 19
Loss = 3.7321e-03, PNorm = 151.2970, GNorm = 0.0425, lr_0 = 2.4690e-04
Loss = 2.5399e-03, PNorm = 151.3011, GNorm = 0.1200, lr_0 = 2.4674e-04
Loss = 3.2373e-03, PNorm = 151.3063, GNorm = 0.0642, lr_0 = 2.4657e-04
Loss = 2.3632e-03, PNorm = 151.3114, GNorm = 0.2080, lr_0 = 2.4640e-04
Loss = 3.3568e-03, PNorm = 151.3164, GNorm = 0.0617, lr_0 = 2.4623e-04
Loss = 3.8992e-03, PNorm = 151.3226, GNorm = 0.0804, lr_0 = 2.4606e-04
Loss = 2.4715e-03, PNorm = 151.3272, GNorm = 0.1078, lr_0 = 2.4589e-04
Loss = 2.5515e-03, PNorm = 151.3291, GNorm = 0.2560, lr_0 = 2.4572e-04
Loss = 2.5866e-03, PNorm = 151.3327, GNorm = 0.2988, lr_0 = 2.4556e-04
Loss = 5.4788e-03, PNorm = 151.3391, GNorm = 0.0724, lr_0 = 2.4539e-04
Loss = 2.4035e-03, PNorm = 151.3445, GNorm = 0.2679, lr_0 = 2.4522e-04
Loss = 2.5557e-03, PNorm = 151.3501, GNorm = 0.0698, lr_0 = 2.4505e-04
Loss = 2.8219e-03, PNorm = 151.3543, GNorm = 0.2906, lr_0 = 2.4488e-04
Loss = 2.8742e-03, PNorm = 151.3596, GNorm = 0.3973, lr_0 = 2.4472e-04
Loss = 2.7422e-03, PNorm = 151.3659, GNorm = 0.1791, lr_0 = 2.4455e-04
Loss = 2.2851e-03, PNorm = 151.3725, GNorm = 0.1474, lr_0 = 2.4438e-04
Loss = 2.2203e-03, PNorm = 151.3783, GNorm = 0.0986, lr_0 = 2.4421e-04
Loss = 2.5248e-03, PNorm = 151.3837, GNorm = 0.0766, lr_0 = 2.4405e-04
Loss = 3.3688e-03, PNorm = 151.3887, GNorm = 0.0872, lr_0 = 2.4388e-04
Loss = 2.3635e-03, PNorm = 151.3930, GNorm = 0.1605, lr_0 = 2.4371e-04
Loss = 4.6417e-03, PNorm = 151.3974, GNorm = 0.2334, lr_0 = 2.4354e-04
Loss = 2.7111e-03, PNorm = 151.4019, GNorm = 0.1677, lr_0 = 2.4338e-04
Loss = 3.0583e-03, PNorm = 151.4080, GNorm = 0.0646, lr_0 = 2.4321e-04
Loss = 2.2518e-03, PNorm = 151.4135, GNorm = 0.0969, lr_0 = 2.4304e-04
Loss = 2.7239e-03, PNorm = 151.4190, GNorm = 0.1984, lr_0 = 2.4288e-04
Loss = 2.5236e-03, PNorm = 151.4237, GNorm = 0.0962, lr_0 = 2.4271e-04
Loss = 3.1523e-03, PNorm = 151.4302, GNorm = 0.1294, lr_0 = 2.4254e-04
Loss = 2.5387e-03, PNorm = 151.4351, GNorm = 0.1473, lr_0 = 2.4238e-04
Loss = 2.1766e-03, PNorm = 151.4419, GNorm = 0.0884, lr_0 = 2.4221e-04
Loss = 3.5014e-03, PNorm = 151.4472, GNorm = 0.2797, lr_0 = 2.4205e-04
Loss = 3.6475e-03, PNorm = 151.4555, GNorm = 0.1737, lr_0 = 2.4188e-04
Loss = 2.7043e-03, PNorm = 151.4625, GNorm = 0.1448, lr_0 = 2.4171e-04
Loss = 5.0814e-03, PNorm = 151.4641, GNorm = 0.1705, lr_0 = 2.4155e-04
Loss = 2.7786e-03, PNorm = 151.4657, GNorm = 0.1043, lr_0 = 2.4138e-04
Loss = 2.4985e-03, PNorm = 151.4708, GNorm = 0.1411, lr_0 = 2.4122e-04
Loss = 2.0790e-03, PNorm = 151.4762, GNorm = 0.1227, lr_0 = 2.4105e-04
Loss = 1.7752e-03, PNorm = 151.4807, GNorm = 0.1643, lr_0 = 2.4089e-04
Loss = 2.5653e-03, PNorm = 151.4841, GNorm = 0.0805, lr_0 = 2.4072e-04
Loss = 2.2204e-03, PNorm = 151.4877, GNorm = 0.0772, lr_0 = 2.4056e-04
Loss = 2.9378e-03, PNorm = 151.4926, GNorm = 0.1117, lr_0 = 2.4039e-04
Loss = 2.2325e-03, PNorm = 151.4997, GNorm = 0.0960, lr_0 = 2.4023e-04
Loss = 2.4110e-03, PNorm = 151.5047, GNorm = 0.0836, lr_0 = 2.4006e-04
Loss = 2.2801e-03, PNorm = 151.5099, GNorm = 0.1628, lr_0 = 2.3990e-04
Loss = 2.9045e-03, PNorm = 151.5157, GNorm = 0.1687, lr_0 = 2.3974e-04
Loss = 3.2885e-03, PNorm = 151.5203, GNorm = 0.3135, lr_0 = 2.3957e-04
Loss = 2.2691e-03, PNorm = 151.5263, GNorm = 0.2096, lr_0 = 2.3941e-04
Loss = 1.7429e-03, PNorm = 151.5331, GNorm = 0.0976, lr_0 = 2.3924e-04
Loss = 3.5963e-03, PNorm = 151.5420, GNorm = 0.0717, lr_0 = 2.3908e-04
Loss = 2.6927e-03, PNorm = 151.5472, GNorm = 0.0952, lr_0 = 2.3892e-04
Loss = 2.9591e-03, PNorm = 151.5522, GNorm = 0.2049, lr_0 = 2.3875e-04
Loss = 2.6295e-03, PNorm = 151.5581, GNorm = 0.0704, lr_0 = 2.3859e-04
Loss = 3.5067e-03, PNorm = 151.5643, GNorm = 0.1190, lr_0 = 2.3842e-04
Loss = 4.6596e-03, PNorm = 151.5744, GNorm = 0.0668, lr_0 = 2.3826e-04
Loss = 2.4503e-03, PNorm = 151.5809, GNorm = 0.1104, lr_0 = 2.3810e-04
Loss = 2.8594e-03, PNorm = 151.5841, GNorm = 0.2451, lr_0 = 2.3794e-04
Loss = 2.8857e-03, PNorm = 151.5903, GNorm = 0.1159, lr_0 = 2.3777e-04
Loss = 3.8808e-03, PNorm = 151.5965, GNorm = 0.1447, lr_0 = 2.3761e-04
Loss = 3.8526e-03, PNorm = 151.6002, GNorm = 0.2367, lr_0 = 2.3745e-04
Loss = 3.8798e-03, PNorm = 151.6073, GNorm = 0.2019, lr_0 = 2.3728e-04
Loss = 2.7481e-03, PNorm = 151.6150, GNorm = 0.0688, lr_0 = 2.3712e-04
Loss = 2.2327e-03, PNorm = 151.6199, GNorm = 0.1258, lr_0 = 2.3696e-04
Loss = 2.8011e-03, PNorm = 151.6291, GNorm = 0.1034, lr_0 = 2.3680e-04
Loss = 2.5968e-03, PNorm = 151.6361, GNorm = 0.1978, lr_0 = 2.3663e-04
Loss = 4.3049e-03, PNorm = 151.6436, GNorm = 0.0888, lr_0 = 2.3647e-04
Loss = 2.1821e-03, PNorm = 151.6515, GNorm = 0.0557, lr_0 = 2.3631e-04
Loss = 2.7138e-03, PNorm = 151.6568, GNorm = 0.1119, lr_0 = 2.3615e-04
Loss = 2.4047e-03, PNorm = 151.6616, GNorm = 0.2909, lr_0 = 2.3599e-04
Loss = 2.4930e-03, PNorm = 151.6665, GNorm = 0.0720, lr_0 = 2.3582e-04
Loss = 2.6163e-03, PNorm = 151.6746, GNorm = 0.1583, lr_0 = 2.3566e-04
Loss = 2.4759e-03, PNorm = 151.6820, GNorm = 0.1924, lr_0 = 2.3550e-04
Loss = 4.1503e-03, PNorm = 151.6872, GNorm = 0.1080, lr_0 = 2.3534e-04
Loss = 3.1614e-03, PNorm = 151.6958, GNorm = 0.0781, lr_0 = 2.3518e-04
Loss = 2.6066e-03, PNorm = 151.7011, GNorm = 0.1714, lr_0 = 2.3502e-04
Loss = 3.0492e-03, PNorm = 151.7075, GNorm = 0.0903, lr_0 = 2.3486e-04
Loss = 3.2046e-03, PNorm = 151.7109, GNorm = 0.3097, lr_0 = 2.3470e-04
Loss = 2.3727e-03, PNorm = 151.7178, GNorm = 0.2461, lr_0 = 2.3454e-04
Loss = 4.0763e-03, PNorm = 151.7208, GNorm = 0.1353, lr_0 = 2.3437e-04
Loss = 3.9643e-03, PNorm = 151.7250, GNorm = 0.2748, lr_0 = 2.3421e-04
Loss = 2.8488e-03, PNorm = 151.7299, GNorm = 0.1281, lr_0 = 2.3405e-04
Loss = 4.3945e-03, PNorm = 151.7344, GNorm = 0.1053, lr_0 = 2.3389e-04
Loss = 2.6669e-03, PNorm = 151.7428, GNorm = 0.1030, lr_0 = 2.3373e-04
Loss = 4.9532e-03, PNorm = 151.7502, GNorm = 0.1336, lr_0 = 2.3357e-04
Loss = 3.0082e-03, PNorm = 151.7557, GNorm = 0.1015, lr_0 = 2.3341e-04
Loss = 2.7800e-03, PNorm = 151.7601, GNorm = 0.1245, lr_0 = 2.3325e-04
Loss = 6.1990e-03, PNorm = 151.7626, GNorm = 0.1662, lr_0 = 2.3309e-04
Loss = 3.2437e-03, PNorm = 151.7680, GNorm = 0.1789, lr_0 = 2.3293e-04
Loss = 3.0339e-03, PNorm = 151.7755, GNorm = 0.1573, lr_0 = 2.3277e-04
Loss = 2.8543e-03, PNorm = 151.7785, GNorm = 0.1987, lr_0 = 2.3261e-04
Loss = 2.3506e-03, PNorm = 151.7860, GNorm = 0.1447, lr_0 = 2.3246e-04
Loss = 2.5695e-03, PNorm = 151.7921, GNorm = 0.2124, lr_0 = 2.3230e-04
Loss = 2.2932e-03, PNorm = 151.7978, GNorm = 0.1399, lr_0 = 2.3214e-04
Loss = 3.3770e-03, PNorm = 151.8035, GNorm = 0.1424, lr_0 = 2.3198e-04
Loss = 2.4303e-03, PNorm = 151.8117, GNorm = 0.1692, lr_0 = 2.3182e-04
Loss = 3.0718e-03, PNorm = 151.8189, GNorm = 0.0938, lr_0 = 2.3166e-04
Loss = 3.8284e-03, PNorm = 151.8269, GNorm = 0.0877, lr_0 = 2.3150e-04
Loss = 1.9868e-03, PNorm = 151.8344, GNorm = 0.0738, lr_0 = 2.3134e-04
Loss = 3.1183e-03, PNorm = 151.8399, GNorm = 0.1060, lr_0 = 2.3118e-04
Loss = 2.6406e-03, PNorm = 151.8458, GNorm = 0.3628, lr_0 = 2.3103e-04
Loss = 2.5565e-03, PNorm = 151.8538, GNorm = 0.2159, lr_0 = 2.3087e-04
Loss = 3.3070e-03, PNorm = 151.8576, GNorm = 0.0822, lr_0 = 2.3071e-04
Loss = 2.9432e-03, PNorm = 151.8635, GNorm = 0.3383, lr_0 = 2.3055e-04
Loss = 3.2938e-03, PNorm = 151.8726, GNorm = 0.2116, lr_0 = 2.3039e-04
Loss = 2.9992e-03, PNorm = 151.8776, GNorm = 0.1773, lr_0 = 2.3024e-04
Loss = 4.7859e-03, PNorm = 151.8830, GNorm = 0.3187, lr_0 = 2.3008e-04
Loss = 3.6870e-03, PNorm = 151.8839, GNorm = 0.1244, lr_0 = 2.2992e-04
Loss = 1.9662e-03, PNorm = 151.8884, GNorm = 0.1801, lr_0 = 2.2976e-04
Loss = 5.4332e-03, PNorm = 151.8967, GNorm = 0.2693, lr_0 = 2.2961e-04
Loss = 2.4915e-03, PNorm = 151.9041, GNorm = 0.0595, lr_0 = 2.2945e-04
Loss = 2.5399e-03, PNorm = 151.9114, GNorm = 0.0920, lr_0 = 2.2929e-04
Loss = 3.4886e-03, PNorm = 151.9154, GNorm = 0.3749, lr_0 = 2.2913e-04
Loss = 3.2562e-03, PNorm = 151.9158, GNorm = 0.1901, lr_0 = 2.2898e-04
Loss = 3.1183e-03, PNorm = 151.9190, GNorm = 0.1820, lr_0 = 2.2882e-04
Loss = 3.1750e-03, PNorm = 151.9272, GNorm = 0.0840, lr_0 = 2.2866e-04
Loss = 3.2970e-03, PNorm = 151.9340, GNorm = 0.0965, lr_0 = 2.2851e-04
Loss = 3.5468e-03, PNorm = 151.9408, GNorm = 0.1040, lr_0 = 2.2835e-04
Loss = 3.8587e-03, PNorm = 151.9476, GNorm = 0.2740, lr_0 = 2.2819e-04
Loss = 3.7381e-03, PNorm = 151.9546, GNorm = 0.3079, lr_0 = 2.2804e-04
Loss = 2.3852e-03, PNorm = 151.9590, GNorm = 0.0699, lr_0 = 2.2788e-04
Loss = 2.8522e-03, PNorm = 151.9664, GNorm = 0.3600, lr_0 = 2.2773e-04
Loss = 2.3398e-03, PNorm = 151.9712, GNorm = 0.0532, lr_0 = 2.2757e-04
Validation mae = 0.477177
Epoch 20
Loss = 2.0478e-03, PNorm = 151.9735, GNorm = 0.2278, lr_0 = 2.2741e-04
Loss = 2.1530e-03, PNorm = 151.9760, GNorm = 0.1528, lr_0 = 2.2726e-04
Loss = 2.1054e-03, PNorm = 151.9803, GNorm = 0.1585, lr_0 = 2.2710e-04
Loss = 3.4323e-03, PNorm = 151.9879, GNorm = 0.3056, lr_0 = 2.2695e-04
Loss = 3.8869e-03, PNorm = 151.9935, GNorm = 0.1529, lr_0 = 2.2679e-04
Loss = 3.2773e-03, PNorm = 151.9991, GNorm = 0.3430, lr_0 = 2.2664e-04
Loss = 2.1439e-03, PNorm = 152.0032, GNorm = 0.1421, lr_0 = 2.2648e-04
Loss = 2.5865e-03, PNorm = 152.0097, GNorm = 0.1998, lr_0 = 2.2632e-04
Loss = 2.3299e-03, PNorm = 152.0149, GNorm = 0.2301, lr_0 = 2.2617e-04
Loss = 2.2201e-03, PNorm = 152.0197, GNorm = 0.1908, lr_0 = 2.2601e-04
Loss = 2.3121e-03, PNorm = 152.0236, GNorm = 0.3043, lr_0 = 2.2586e-04
Loss = 1.8414e-03, PNorm = 152.0287, GNorm = 0.0758, lr_0 = 2.2571e-04
Loss = 1.8209e-03, PNorm = 152.0327, GNorm = 0.0413, lr_0 = 2.2555e-04
Loss = 3.6800e-03, PNorm = 152.0398, GNorm = 0.0700, lr_0 = 2.2540e-04
Loss = 2.9818e-03, PNorm = 152.0471, GNorm = 0.1780, lr_0 = 2.2524e-04
Loss = 3.5374e-03, PNorm = 152.0522, GNorm = 0.1043, lr_0 = 2.2509e-04
Loss = 2.7472e-03, PNorm = 152.0588, GNorm = 0.2316, lr_0 = 2.2493e-04
Loss = 3.1360e-03, PNorm = 152.0639, GNorm = 0.1170, lr_0 = 2.2478e-04
Loss = 2.3649e-03, PNorm = 152.0673, GNorm = 0.1727, lr_0 = 2.2463e-04
Loss = 2.1147e-03, PNorm = 152.0709, GNorm = 0.0878, lr_0 = 2.2447e-04
Loss = 2.2581e-03, PNorm = 152.0741, GNorm = 0.0596, lr_0 = 2.2432e-04
Loss = 1.8400e-03, PNorm = 152.0787, GNorm = 0.0730, lr_0 = 2.2416e-04
Loss = 2.7998e-03, PNorm = 152.0849, GNorm = 0.1444, lr_0 = 2.2401e-04
Loss = 1.8306e-03, PNorm = 152.0889, GNorm = 0.0921, lr_0 = 2.2386e-04
Loss = 2.7559e-03, PNorm = 152.0929, GNorm = 0.1321, lr_0 = 2.2370e-04
Loss = 2.2263e-03, PNorm = 152.0974, GNorm = 0.0723, lr_0 = 2.2355e-04
Loss = 2.7587e-03, PNorm = 152.1015, GNorm = 0.2645, lr_0 = 2.2340e-04
Loss = 2.6852e-03, PNorm = 152.1057, GNorm = 0.1447, lr_0 = 2.2324e-04
Loss = 3.0966e-03, PNorm = 152.1084, GNorm = 0.1600, lr_0 = 2.2309e-04
Loss = 2.5639e-03, PNorm = 152.1111, GNorm = 0.0724, lr_0 = 2.2294e-04
Loss = 3.0651e-03, PNorm = 152.1165, GNorm = 0.1685, lr_0 = 2.2279e-04
Loss = 3.0072e-03, PNorm = 152.1212, GNorm = 0.0597, lr_0 = 2.2263e-04
Loss = 3.4160e-03, PNorm = 152.1223, GNorm = 0.1906, lr_0 = 2.2248e-04
Loss = 2.0512e-03, PNorm = 152.1265, GNorm = 0.1102, lr_0 = 2.2233e-04
Loss = 3.6065e-03, PNorm = 152.1302, GNorm = 0.1788, lr_0 = 2.2218e-04
Loss = 2.5513e-03, PNorm = 152.1348, GNorm = 0.2178, lr_0 = 2.2202e-04
Loss = 2.1092e-03, PNorm = 152.1422, GNorm = 0.1951, lr_0 = 2.2187e-04
Loss = 1.9128e-03, PNorm = 152.1489, GNorm = 0.0775, lr_0 = 2.2172e-04
Loss = 2.1290e-03, PNorm = 152.1536, GNorm = 0.1119, lr_0 = 2.2157e-04
Loss = 2.0197e-03, PNorm = 152.1585, GNorm = 0.1087, lr_0 = 2.2142e-04
Loss = 2.4718e-03, PNorm = 152.1607, GNorm = 0.1298, lr_0 = 2.2126e-04
Loss = 2.2531e-03, PNorm = 152.1658, GNorm = 0.2894, lr_0 = 2.2111e-04
Loss = 2.3708e-03, PNorm = 152.1703, GNorm = 0.1282, lr_0 = 2.2096e-04
Loss = 2.6591e-03, PNorm = 152.1760, GNorm = 0.0545, lr_0 = 2.2081e-04
Loss = 2.4819e-03, PNorm = 152.1796, GNorm = 0.1267, lr_0 = 2.2066e-04
Loss = 3.4919e-03, PNorm = 152.1853, GNorm = 0.2620, lr_0 = 2.2051e-04
Loss = 2.5584e-03, PNorm = 152.1918, GNorm = 0.0577, lr_0 = 2.2036e-04
Loss = 2.0545e-03, PNorm = 152.1970, GNorm = 0.1059, lr_0 = 2.2021e-04
Loss = 2.1411e-03, PNorm = 152.2015, GNorm = 0.3157, lr_0 = 2.2005e-04
Loss = 2.4486e-03, PNorm = 152.2055, GNorm = 0.1749, lr_0 = 2.1990e-04
Loss = 6.1534e-03, PNorm = 152.2080, GNorm = 0.2731, lr_0 = 2.1975e-04
Loss = 2.0464e-03, PNorm = 152.2139, GNorm = 0.1953, lr_0 = 2.1960e-04
Loss = 3.4241e-03, PNorm = 152.2212, GNorm = 0.1889, lr_0 = 2.1945e-04
Loss = 2.0151e-03, PNorm = 152.2275, GNorm = 0.0536, lr_0 = 2.1930e-04
Loss = 2.4522e-03, PNorm = 152.2322, GNorm = 0.0951, lr_0 = 2.1915e-04
Loss = 2.0453e-03, PNorm = 152.2347, GNorm = 0.0578, lr_0 = 2.1900e-04
Loss = 3.7144e-03, PNorm = 152.2386, GNorm = 0.1007, lr_0 = 2.1885e-04
Loss = 2.7661e-03, PNorm = 152.2435, GNorm = 0.1175, lr_0 = 2.1870e-04
Loss = 2.5313e-03, PNorm = 152.2473, GNorm = 0.0923, lr_0 = 2.1855e-04
Loss = 2.0697e-03, PNorm = 152.2531, GNorm = 0.0867, lr_0 = 2.1840e-04
Loss = 1.8218e-03, PNorm = 152.2563, GNorm = 0.0498, lr_0 = 2.1825e-04
Loss = 1.9791e-03, PNorm = 152.2618, GNorm = 0.1115, lr_0 = 2.1810e-04
Loss = 2.2630e-03, PNorm = 152.2665, GNorm = 0.0873, lr_0 = 2.1795e-04
Loss = 2.2562e-03, PNorm = 152.2725, GNorm = 0.1371, lr_0 = 2.1780e-04
Loss = 1.8984e-03, PNorm = 152.2763, GNorm = 0.0643, lr_0 = 2.1765e-04
Loss = 1.7559e-03, PNorm = 152.2815, GNorm = 0.2244, lr_0 = 2.1751e-04
Loss = 2.2072e-03, PNorm = 152.2867, GNorm = 0.0463, lr_0 = 2.1736e-04
Loss = 2.0755e-03, PNorm = 152.2926, GNorm = 0.0736, lr_0 = 2.1721e-04
Loss = 3.3428e-03, PNorm = 152.2968, GNorm = 0.2849, lr_0 = 2.1706e-04
Loss = 3.9666e-03, PNorm = 152.3032, GNorm = 0.1023, lr_0 = 2.1691e-04
Loss = 2.9438e-03, PNorm = 152.3091, GNorm = 0.1011, lr_0 = 2.1676e-04
Loss = 2.0301e-03, PNorm = 152.3123, GNorm = 0.0578, lr_0 = 2.1661e-04
Loss = 1.7669e-03, PNorm = 152.3182, GNorm = 0.0745, lr_0 = 2.1646e-04
Loss = 2.1956e-03, PNorm = 152.3203, GNorm = 0.1604, lr_0 = 2.1632e-04
Loss = 2.7476e-03, PNorm = 152.3262, GNorm = 0.1463, lr_0 = 2.1617e-04
Loss = 3.0355e-03, PNorm = 152.3307, GNorm = 0.1390, lr_0 = 2.1602e-04
Loss = 5.4599e-03, PNorm = 152.3356, GNorm = 0.1767, lr_0 = 2.1587e-04
Loss = 2.4368e-03, PNorm = 152.3393, GNorm = 0.1203, lr_0 = 2.1572e-04
Loss = 3.3006e-03, PNorm = 152.3448, GNorm = 0.2439, lr_0 = 2.1558e-04
Loss = 2.0093e-03, PNorm = 152.3498, GNorm = 0.0930, lr_0 = 2.1543e-04
Loss = 3.3224e-03, PNorm = 152.3550, GNorm = 0.1006, lr_0 = 2.1528e-04
Loss = 1.9348e-03, PNorm = 152.3600, GNorm = 0.1883, lr_0 = 2.1513e-04
Loss = 2.1405e-03, PNorm = 152.3674, GNorm = 0.2844, lr_0 = 2.1499e-04
Loss = 2.3967e-03, PNorm = 152.3727, GNorm = 0.0931, lr_0 = 2.1484e-04
Loss = 2.0095e-03, PNorm = 152.3791, GNorm = 0.2595, lr_0 = 2.1469e-04
Loss = 2.2892e-03, PNorm = 152.3836, GNorm = 0.1482, lr_0 = 2.1454e-04
Loss = 1.8319e-03, PNorm = 152.3880, GNorm = 0.1192, lr_0 = 2.1440e-04
Loss = 1.6076e-03, PNorm = 152.3918, GNorm = 0.1014, lr_0 = 2.1425e-04
Loss = 3.0043e-03, PNorm = 152.3952, GNorm = 0.1190, lr_0 = 2.1410e-04
Loss = 2.5756e-03, PNorm = 152.3992, GNorm = 0.0715, lr_0 = 2.1396e-04
Loss = 1.9276e-03, PNorm = 152.4041, GNorm = 0.0855, lr_0 = 2.1381e-04
Loss = 3.4369e-03, PNorm = 152.4100, GNorm = 0.1300, lr_0 = 2.1366e-04
Loss = 1.7149e-03, PNorm = 152.4148, GNorm = 0.0628, lr_0 = 2.1352e-04
Loss = 3.1007e-03, PNorm = 152.4180, GNorm = 0.0757, lr_0 = 2.1337e-04
Loss = 2.0696e-03, PNorm = 152.4212, GNorm = 0.2369, lr_0 = 2.1323e-04
Loss = 3.3857e-03, PNorm = 152.4270, GNorm = 0.0842, lr_0 = 2.1308e-04
Loss = 4.7633e-03, PNorm = 152.4337, GNorm = 0.1809, lr_0 = 2.1293e-04
Loss = 1.8774e-03, PNorm = 152.4382, GNorm = 0.1571, lr_0 = 2.1279e-04
Loss = 5.4332e-03, PNorm = 152.4428, GNorm = 0.0500, lr_0 = 2.1264e-04
Loss = 5.3260e-03, PNorm = 152.4444, GNorm = 0.2757, lr_0 = 2.1250e-04
Loss = 2.2508e-03, PNorm = 152.4495, GNorm = 0.0939, lr_0 = 2.1235e-04
Loss = 4.0069e-03, PNorm = 152.4567, GNorm = 0.1022, lr_0 = 2.1221e-04
Loss = 6.4061e-03, PNorm = 152.4632, GNorm = 0.8567, lr_0 = 2.1206e-04
Loss = 2.1793e-03, PNorm = 152.4689, GNorm = 0.1639, lr_0 = 2.1191e-04
Loss = 2.2386e-03, PNorm = 152.4738, GNorm = 0.1181, lr_0 = 2.1177e-04
Loss = 1.5409e-03, PNorm = 152.4766, GNorm = 0.1053, lr_0 = 2.1162e-04
Loss = 2.8021e-03, PNorm = 152.4796, GNorm = 0.1918, lr_0 = 2.1148e-04
Loss = 2.5126e-03, PNorm = 152.4835, GNorm = 0.0478, lr_0 = 2.1133e-04
Loss = 3.2995e-03, PNorm = 152.4896, GNorm = 0.0435, lr_0 = 2.1119e-04
Loss = 2.5068e-03, PNorm = 152.4925, GNorm = 0.0665, lr_0 = 2.1104e-04
Loss = 2.6534e-03, PNorm = 152.4981, GNorm = 0.1740, lr_0 = 2.1090e-04
Loss = 3.8818e-03, PNorm = 152.5029, GNorm = 0.1905, lr_0 = 2.1076e-04
Loss = 1.8202e-03, PNorm = 152.5076, GNorm = 0.0552, lr_0 = 2.1061e-04
Loss = 4.2903e-03, PNorm = 152.5132, GNorm = 0.2404, lr_0 = 2.1047e-04
Loss = 2.3313e-03, PNorm = 152.5168, GNorm = 0.2606, lr_0 = 2.1032e-04
Loss = 2.9989e-03, PNorm = 152.5216, GNorm = 0.0859, lr_0 = 2.1018e-04
Loss = 2.7493e-03, PNorm = 152.5258, GNorm = 0.1350, lr_0 = 2.1003e-04
Loss = 2.7392e-03, PNorm = 152.5335, GNorm = 0.0991, lr_0 = 2.0989e-04
Loss = 4.1982e-03, PNorm = 152.5407, GNorm = 0.1222, lr_0 = 2.0975e-04
Loss = 2.2242e-03, PNorm = 152.5470, GNorm = 0.0791, lr_0 = 2.0960e-04
Validation mae = 0.477031
Epoch 21
Loss = 2.5368e-03, PNorm = 152.5515, GNorm = 0.1620, lr_0 = 2.0946e-04
Loss = 3.1034e-03, PNorm = 152.5547, GNorm = 0.2427, lr_0 = 2.0932e-04
Loss = 2.4318e-03, PNorm = 152.5567, GNorm = 0.0671, lr_0 = 2.0917e-04
Loss = 2.6514e-03, PNorm = 152.5595, GNorm = 0.0524, lr_0 = 2.0903e-04
Loss = 2.2076e-03, PNorm = 152.5652, GNorm = 0.0824, lr_0 = 2.0889e-04
Loss = 2.0768e-03, PNorm = 152.5696, GNorm = 0.1357, lr_0 = 2.0874e-04
Loss = 1.7525e-03, PNorm = 152.5721, GNorm = 0.1315, lr_0 = 2.0860e-04
Loss = 2.0248e-03, PNorm = 152.5734, GNorm = 0.1726, lr_0 = 2.0846e-04
Loss = 2.1854e-03, PNorm = 152.5772, GNorm = 0.2533, lr_0 = 2.0831e-04
Loss = 1.5956e-03, PNorm = 152.5815, GNorm = 0.0990, lr_0 = 2.0817e-04
Loss = 2.7493e-03, PNorm = 152.5871, GNorm = 0.2465, lr_0 = 2.0803e-04
Loss = 1.5423e-03, PNorm = 152.5912, GNorm = 0.2098, lr_0 = 2.0789e-04
Loss = 1.8111e-03, PNorm = 152.5967, GNorm = 0.1533, lr_0 = 2.0774e-04
Loss = 2.5310e-03, PNorm = 152.6012, GNorm = 0.1041, lr_0 = 2.0760e-04
Loss = 1.7626e-03, PNorm = 152.6065, GNorm = 0.0536, lr_0 = 2.0746e-04
Loss = 2.3955e-03, PNorm = 152.6114, GNorm = 0.0965, lr_0 = 2.0732e-04
Loss = 2.9939e-03, PNorm = 152.6149, GNorm = 0.1555, lr_0 = 2.0718e-04
Loss = 1.8379e-03, PNorm = 152.6189, GNorm = 0.0630, lr_0 = 2.0703e-04
Loss = 2.1268e-03, PNorm = 152.6235, GNorm = 0.1449, lr_0 = 2.0689e-04
Loss = 1.5560e-03, PNorm = 152.6259, GNorm = 0.1919, lr_0 = 2.0675e-04
Loss = 2.5550e-03, PNorm = 152.6299, GNorm = 0.2213, lr_0 = 2.0661e-04
Loss = 3.5225e-03, PNorm = 152.6343, GNorm = 0.2291, lr_0 = 2.0647e-04
Loss = 2.0874e-03, PNorm = 152.6374, GNorm = 0.4630, lr_0 = 2.0633e-04
Loss = 1.8807e-03, PNorm = 152.6407, GNorm = 0.1057, lr_0 = 2.0618e-04
Loss = 2.0206e-03, PNorm = 152.6442, GNorm = 0.3872, lr_0 = 2.0604e-04
Loss = 1.9344e-03, PNorm = 152.6464, GNorm = 0.0767, lr_0 = 2.0590e-04
Loss = 2.1978e-03, PNorm = 152.6532, GNorm = 0.3629, lr_0 = 2.0576e-04
Loss = 2.2639e-03, PNorm = 152.6603, GNorm = 0.0771, lr_0 = 2.0562e-04
Loss = 2.1494e-03, PNorm = 152.6630, GNorm = 0.0985, lr_0 = 2.0548e-04
Loss = 1.8477e-03, PNorm = 152.6655, GNorm = 0.1751, lr_0 = 2.0534e-04
Loss = 1.9001e-03, PNorm = 152.6678, GNorm = 0.0892, lr_0 = 2.0520e-04
Loss = 1.5300e-03, PNorm = 152.6712, GNorm = 0.0726, lr_0 = 2.0506e-04
Loss = 2.6128e-03, PNorm = 152.6751, GNorm = 0.2874, lr_0 = 2.0492e-04
Loss = 1.7104e-03, PNorm = 152.6775, GNorm = 0.2660, lr_0 = 2.0478e-04
Loss = 1.8536e-03, PNorm = 152.6810, GNorm = 0.0480, lr_0 = 2.0464e-04
Loss = 2.4524e-03, PNorm = 152.6864, GNorm = 0.1329, lr_0 = 2.0450e-04
Loss = 1.6307e-03, PNorm = 152.6906, GNorm = 0.1388, lr_0 = 2.0436e-04
Loss = 2.2919e-03, PNorm = 152.6942, GNorm = 0.1478, lr_0 = 2.0422e-04
Loss = 4.4950e-03, PNorm = 152.6976, GNorm = 0.1701, lr_0 = 2.0408e-04
Loss = 1.7151e-03, PNorm = 152.7004, GNorm = 0.1371, lr_0 = 2.0394e-04
Loss = 1.9714e-03, PNorm = 152.7043, GNorm = 0.0561, lr_0 = 2.0380e-04
Loss = 2.1340e-03, PNorm = 152.7089, GNorm = 0.0520, lr_0 = 2.0366e-04
Loss = 1.6634e-03, PNorm = 152.7143, GNorm = 0.0613, lr_0 = 2.0352e-04
Loss = 1.9339e-03, PNorm = 152.7172, GNorm = 0.2812, lr_0 = 2.0338e-04
Loss = 2.0313e-03, PNorm = 152.7204, GNorm = 0.0700, lr_0 = 2.0324e-04
Loss = 6.1716e-03, PNorm = 152.7217, GNorm = 0.0926, lr_0 = 2.0310e-04
Loss = 2.5309e-03, PNorm = 152.7241, GNorm = 0.1666, lr_0 = 2.0296e-04
Loss = 1.2162e-03, PNorm = 152.7287, GNorm = 0.1905, lr_0 = 2.0282e-04
Loss = 3.2674e-03, PNorm = 152.7311, GNorm = 0.0897, lr_0 = 2.0268e-04
Loss = 1.5201e-03, PNorm = 152.7339, GNorm = 0.1520, lr_0 = 2.0254e-04
Loss = 3.3101e-03, PNorm = 152.7372, GNorm = 0.0875, lr_0 = 2.0240e-04
Loss = 2.7453e-03, PNorm = 152.7411, GNorm = 0.1705, lr_0 = 2.0227e-04
Loss = 1.7164e-03, PNorm = 152.7469, GNorm = 0.1632, lr_0 = 2.0213e-04
Loss = 2.0862e-03, PNorm = 152.7508, GNorm = 0.0801, lr_0 = 2.0199e-04
Loss = 2.6759e-03, PNorm = 152.7556, GNorm = 0.0842, lr_0 = 2.0185e-04
Loss = 2.5021e-03, PNorm = 152.7613, GNorm = 0.1415, lr_0 = 2.0171e-04
Loss = 2.8931e-03, PNorm = 152.7675, GNorm = 0.1499, lr_0 = 2.0157e-04
Loss = 2.0086e-03, PNorm = 152.7730, GNorm = 0.1285, lr_0 = 2.0144e-04
Loss = 1.6681e-03, PNorm = 152.7794, GNorm = 0.0567, lr_0 = 2.0130e-04
Loss = 2.0416e-03, PNorm = 152.7823, GNorm = 0.0659, lr_0 = 2.0116e-04
Loss = 2.4839e-03, PNorm = 152.7853, GNorm = 0.2129, lr_0 = 2.0102e-04
Loss = 3.1358e-03, PNorm = 152.7898, GNorm = 0.1798, lr_0 = 2.0088e-04
Loss = 2.3643e-03, PNorm = 152.7962, GNorm = 0.0946, lr_0 = 2.0075e-04
Loss = 2.2557e-03, PNorm = 152.8021, GNorm = 0.0400, lr_0 = 2.0061e-04
Loss = 3.8662e-03, PNorm = 152.8074, GNorm = 0.0883, lr_0 = 2.0047e-04
Loss = 1.7584e-03, PNorm = 152.8116, GNorm = 0.1806, lr_0 = 2.0033e-04
Loss = 1.6322e-03, PNorm = 152.8136, GNorm = 0.0632, lr_0 = 2.0020e-04
Loss = 2.5270e-03, PNorm = 152.8186, GNorm = 0.1484, lr_0 = 2.0006e-04
Loss = 2.1577e-03, PNorm = 152.8242, GNorm = 0.2919, lr_0 = 1.9992e-04
Loss = 1.7084e-03, PNorm = 152.8290, GNorm = 0.1849, lr_0 = 1.9979e-04
Loss = 1.7394e-03, PNorm = 152.8340, GNorm = 0.2659, lr_0 = 1.9965e-04
Loss = 2.1204e-03, PNorm = 152.8381, GNorm = 0.0989, lr_0 = 1.9951e-04
Loss = 1.9211e-03, PNorm = 152.8403, GNorm = 0.1305, lr_0 = 1.9938e-04
Loss = 2.7078e-03, PNorm = 152.8424, GNorm = 0.0412, lr_0 = 1.9924e-04
Loss = 2.6232e-03, PNorm = 152.8461, GNorm = 0.0878, lr_0 = 1.9910e-04
Loss = 2.4503e-03, PNorm = 152.8525, GNorm = 0.0961, lr_0 = 1.9897e-04
Loss = 1.6184e-03, PNorm = 152.8580, GNorm = 0.1248, lr_0 = 1.9883e-04
Loss = 2.2180e-03, PNorm = 152.8618, GNorm = 0.1793, lr_0 = 1.9869e-04
Loss = 1.3908e-03, PNorm = 152.8668, GNorm = 0.1103, lr_0 = 1.9856e-04
Loss = 2.1460e-03, PNorm = 152.8690, GNorm = 0.0838, lr_0 = 1.9842e-04
Loss = 4.7556e-03, PNorm = 152.8718, GNorm = 0.1306, lr_0 = 1.9829e-04
Loss = 2.3828e-03, PNorm = 152.8764, GNorm = 0.2368, lr_0 = 1.9815e-04
Loss = 3.5415e-03, PNorm = 152.8825, GNorm = 0.0751, lr_0 = 1.9801e-04
Loss = 2.3351e-03, PNorm = 152.8876, GNorm = 0.1244, lr_0 = 1.9788e-04
Loss = 1.8569e-03, PNorm = 152.8917, GNorm = 0.0827, lr_0 = 1.9774e-04
Loss = 3.0186e-03, PNorm = 152.8963, GNorm = 0.0759, lr_0 = 1.9761e-04
Loss = 2.1112e-03, PNorm = 152.9015, GNorm = 0.2643, lr_0 = 1.9747e-04
Loss = 2.0221e-03, PNorm = 152.9041, GNorm = 0.0742, lr_0 = 1.9734e-04
Loss = 1.7880e-03, PNorm = 152.9075, GNorm = 0.1386, lr_0 = 1.9720e-04
Loss = 1.7955e-03, PNorm = 152.9110, GNorm = 0.2790, lr_0 = 1.9707e-04
Loss = 1.9138e-03, PNorm = 152.9157, GNorm = 0.1288, lr_0 = 1.9693e-04
Loss = 1.5521e-03, PNorm = 152.9194, GNorm = 0.0679, lr_0 = 1.9680e-04
Loss = 2.6123e-03, PNorm = 152.9230, GNorm = 0.1566, lr_0 = 1.9666e-04
Loss = 1.9965e-03, PNorm = 152.9308, GNorm = 0.0874, lr_0 = 1.9653e-04
Loss = 1.4943e-03, PNorm = 152.9362, GNorm = 0.1312, lr_0 = 1.9639e-04
Loss = 1.2830e-03, PNorm = 152.9395, GNorm = 0.1020, lr_0 = 1.9626e-04
Loss = 1.3751e-03, PNorm = 152.9435, GNorm = 0.1704, lr_0 = 1.9612e-04
Loss = 2.0902e-03, PNorm = 152.9477, GNorm = 0.1236, lr_0 = 1.9599e-04
Loss = 3.7426e-03, PNorm = 152.9507, GNorm = 0.1297, lr_0 = 1.9585e-04
Loss = 3.4798e-03, PNorm = 152.9549, GNorm = 0.1248, lr_0 = 1.9572e-04
Loss = 1.7985e-03, PNorm = 152.9573, GNorm = 0.0927, lr_0 = 1.9559e-04
Loss = 2.3066e-03, PNorm = 152.9622, GNorm = 0.0793, lr_0 = 1.9545e-04
Loss = 3.9386e-03, PNorm = 152.9654, GNorm = 0.1039, lr_0 = 1.9532e-04
Loss = 1.7034e-03, PNorm = 152.9685, GNorm = 0.1338, lr_0 = 1.9518e-04
Loss = 1.9709e-03, PNorm = 152.9725, GNorm = 0.1348, lr_0 = 1.9505e-04
Loss = 3.7050e-03, PNorm = 152.9770, GNorm = 0.1508, lr_0 = 1.9492e-04
Loss = 2.9122e-03, PNorm = 152.9829, GNorm = 0.0613, lr_0 = 1.9478e-04
Loss = 2.1718e-03, PNorm = 152.9889, GNorm = 0.1836, lr_0 = 1.9465e-04
Loss = 5.1922e-03, PNorm = 152.9905, GNorm = 0.2039, lr_0 = 1.9452e-04
Loss = 2.0099e-03, PNorm = 152.9941, GNorm = 0.1088, lr_0 = 1.9438e-04
Loss = 2.5933e-03, PNorm = 152.9977, GNorm = 0.1871, lr_0 = 1.9425e-04
Loss = 2.4207e-03, PNorm = 153.0004, GNorm = 0.0664, lr_0 = 1.9412e-04
Loss = 4.1203e-03, PNorm = 153.0031, GNorm = 0.0787, lr_0 = 1.9398e-04
Loss = 2.2637e-03, PNorm = 153.0066, GNorm = 0.0757, lr_0 = 1.9385e-04
Loss = 2.3029e-03, PNorm = 153.0100, GNorm = 0.0883, lr_0 = 1.9372e-04
Loss = 1.9045e-03, PNorm = 153.0138, GNorm = 0.1069, lr_0 = 1.9359e-04
Loss = 1.9092e-03, PNorm = 153.0173, GNorm = 0.0758, lr_0 = 1.9345e-04
Loss = 3.6538e-03, PNorm = 153.0239, GNorm = 0.1345, lr_0 = 1.9332e-04
Loss = 1.9359e-03, PNorm = 153.0291, GNorm = 0.1560, lr_0 = 1.9319e-04
Loss = 4.6840e-03, PNorm = 153.0322, GNorm = 0.2903, lr_0 = 1.9306e-04
Validation mae = 0.479436
Epoch 22
Loss = 2.7198e-03, PNorm = 153.0355, GNorm = 0.1519, lr_0 = 1.9292e-04
Loss = 1.7506e-03, PNorm = 153.0376, GNorm = 0.2085, lr_0 = 1.9279e-04
Loss = 1.4547e-03, PNorm = 153.0400, GNorm = 0.0577, lr_0 = 1.9266e-04
Loss = 1.4497e-03, PNorm = 153.0407, GNorm = 0.2141, lr_0 = 1.9253e-04
Loss = 1.7491e-03, PNorm = 153.0425, GNorm = 0.0643, lr_0 = 1.9240e-04
Loss = 2.7156e-03, PNorm = 153.0466, GNorm = 0.3490, lr_0 = 1.9226e-04
Loss = 2.0798e-03, PNorm = 153.0476, GNorm = 0.0767, lr_0 = 1.9213e-04
Loss = 2.3459e-03, PNorm = 153.0487, GNorm = 0.0695, lr_0 = 1.9200e-04
Loss = 1.5287e-03, PNorm = 153.0528, GNorm = 0.1394, lr_0 = 1.9187e-04
Loss = 1.5281e-03, PNorm = 153.0578, GNorm = 0.2094, lr_0 = 1.9174e-04
Loss = 1.5899e-03, PNorm = 153.0628, GNorm = 0.2808, lr_0 = 1.9161e-04
Loss = 1.6230e-03, PNorm = 153.0668, GNorm = 0.1794, lr_0 = 1.9148e-04
Loss = 1.3698e-03, PNorm = 153.0721, GNorm = 0.1278, lr_0 = 1.9134e-04
Loss = 1.6917e-03, PNorm = 153.0737, GNorm = 0.1113, lr_0 = 1.9121e-04
Loss = 4.8446e-03, PNorm = 153.0765, GNorm = 0.2901, lr_0 = 1.9108e-04
Loss = 1.4637e-03, PNorm = 153.0783, GNorm = 0.1433, lr_0 = 1.9095e-04
Loss = 1.8511e-03, PNorm = 153.0797, GNorm = 0.0360, lr_0 = 1.9082e-04
Loss = 1.4614e-03, PNorm = 153.0838, GNorm = 0.1001, lr_0 = 1.9069e-04
Loss = 2.0186e-03, PNorm = 153.0853, GNorm = 0.1671, lr_0 = 1.9056e-04
Loss = 1.4336e-03, PNorm = 153.0885, GNorm = 0.0857, lr_0 = 1.9043e-04
Loss = 1.8291e-03, PNorm = 153.0892, GNorm = 0.0880, lr_0 = 1.9030e-04
Loss = 1.5668e-03, PNorm = 153.0938, GNorm = 0.1604, lr_0 = 1.9017e-04
Loss = 1.9411e-03, PNorm = 153.0962, GNorm = 0.1646, lr_0 = 1.9004e-04
Loss = 2.1652e-03, PNorm = 153.1033, GNorm = 0.0560, lr_0 = 1.8991e-04
Loss = 2.1579e-03, PNorm = 153.1092, GNorm = 0.1207, lr_0 = 1.8978e-04
Loss = 2.4813e-03, PNorm = 153.1124, GNorm = 0.3154, lr_0 = 1.8965e-04
Loss = 1.4990e-03, PNorm = 153.1138, GNorm = 0.0523, lr_0 = 1.8952e-04
Loss = 1.8560e-03, PNorm = 153.1171, GNorm = 0.1962, lr_0 = 1.8939e-04
Loss = 2.3298e-03, PNorm = 153.1218, GNorm = 0.1059, lr_0 = 1.8926e-04
Loss = 1.3768e-03, PNorm = 153.1282, GNorm = 0.0943, lr_0 = 1.8913e-04
Loss = 1.8010e-03, PNorm = 153.1310, GNorm = 0.0735, lr_0 = 1.8900e-04
Loss = 2.0198e-03, PNorm = 153.1328, GNorm = 0.3055, lr_0 = 1.8887e-04
Loss = 1.4938e-03, PNorm = 153.1352, GNorm = 0.0388, lr_0 = 1.8874e-04
Loss = 2.5575e-03, PNorm = 153.1377, GNorm = 0.0707, lr_0 = 1.8861e-04
Loss = 1.9067e-03, PNorm = 153.1388, GNorm = 0.4288, lr_0 = 1.8848e-04
Loss = 1.6822e-03, PNorm = 153.1416, GNorm = 0.0505, lr_0 = 1.8835e-04
Loss = 1.4164e-03, PNorm = 153.1466, GNorm = 0.1170, lr_0 = 1.8822e-04
Loss = 1.3160e-03, PNorm = 153.1516, GNorm = 0.0518, lr_0 = 1.8809e-04
Loss = 4.1821e-03, PNorm = 153.1536, GNorm = 0.4192, lr_0 = 1.8797e-04
Loss = 2.3047e-03, PNorm = 153.1584, GNorm = 0.1597, lr_0 = 1.8784e-04
Loss = 1.5756e-03, PNorm = 153.1612, GNorm = 0.1135, lr_0 = 1.8771e-04
Loss = 1.6044e-03, PNorm = 153.1669, GNorm = 0.0452, lr_0 = 1.8758e-04
Loss = 3.6027e-03, PNorm = 153.1708, GNorm = 0.1744, lr_0 = 1.8745e-04
Loss = 2.1775e-03, PNorm = 153.1750, GNorm = 0.2290, lr_0 = 1.8732e-04
Loss = 1.7414e-03, PNorm = 153.1788, GNorm = 0.0589, lr_0 = 1.8719e-04
Loss = 2.2529e-03, PNorm = 153.1824, GNorm = 0.4044, lr_0 = 1.8707e-04
Loss = 1.6245e-03, PNorm = 153.1853, GNorm = 0.0846, lr_0 = 1.8694e-04
Loss = 1.8557e-03, PNorm = 153.1905, GNorm = 0.2010, lr_0 = 1.8681e-04
Loss = 1.2043e-03, PNorm = 153.1942, GNorm = 0.0838, lr_0 = 1.8668e-04
Loss = 2.6054e-03, PNorm = 153.1981, GNorm = 0.0599, lr_0 = 1.8655e-04
Loss = 4.1736e-03, PNorm = 153.2007, GNorm = 0.1106, lr_0 = 1.8643e-04
Loss = 1.8036e-03, PNorm = 153.2029, GNorm = 0.0534, lr_0 = 1.8630e-04
Loss = 2.7447e-03, PNorm = 153.2048, GNorm = 0.0639, lr_0 = 1.8617e-04
Loss = 1.8120e-03, PNorm = 153.2068, GNorm = 0.1823, lr_0 = 1.8604e-04
Loss = 1.8814e-03, PNorm = 153.2101, GNorm = 0.1165, lr_0 = 1.8592e-04
Loss = 2.1279e-03, PNorm = 153.2135, GNorm = 0.0858, lr_0 = 1.8579e-04
Loss = 2.2147e-03, PNorm = 153.2167, GNorm = 0.0662, lr_0 = 1.8566e-04
Loss = 2.1774e-03, PNorm = 153.2198, GNorm = 0.0653, lr_0 = 1.8553e-04
Loss = 5.2676e-03, PNorm = 153.2225, GNorm = 0.0563, lr_0 = 1.8541e-04
Loss = 1.8744e-03, PNorm = 153.2281, GNorm = 0.1460, lr_0 = 1.8528e-04
Loss = 1.7172e-03, PNorm = 153.2310, GNorm = 0.1877, lr_0 = 1.8515e-04
Loss = 4.4638e-03, PNorm = 153.2365, GNorm = 0.1044, lr_0 = 1.8503e-04
Loss = 1.2382e-03, PNorm = 153.2398, GNorm = 0.0845, lr_0 = 1.8490e-04
Loss = 1.6739e-03, PNorm = 153.2417, GNorm = 0.1110, lr_0 = 1.8477e-04
Loss = 3.0320e-03, PNorm = 153.2417, GNorm = 0.1416, lr_0 = 1.8465e-04
Loss = 1.6728e-03, PNorm = 153.2447, GNorm = 0.1182, lr_0 = 1.8452e-04
Loss = 2.0536e-03, PNorm = 153.2477, GNorm = 0.2464, lr_0 = 1.8439e-04
Loss = 2.1668e-03, PNorm = 153.2504, GNorm = 0.1083, lr_0 = 1.8427e-04
Loss = 1.3699e-03, PNorm = 153.2518, GNorm = 0.1391, lr_0 = 1.8414e-04
Loss = 1.9073e-03, PNorm = 153.2549, GNorm = 0.2092, lr_0 = 1.8401e-04
Loss = 1.4511e-03, PNorm = 153.2597, GNorm = 0.0561, lr_0 = 1.8389e-04
Loss = 1.9744e-03, PNorm = 153.2646, GNorm = 0.1508, lr_0 = 1.8376e-04
Loss = 3.3987e-03, PNorm = 153.2700, GNorm = 0.2919, lr_0 = 1.8364e-04
Loss = 2.1051e-03, PNorm = 153.2750, GNorm = 0.0775, lr_0 = 1.8351e-04
Loss = 2.0988e-03, PNorm = 153.2832, GNorm = 0.1286, lr_0 = 1.8338e-04
Loss = 3.7505e-03, PNorm = 153.2880, GNorm = 0.4016, lr_0 = 1.8326e-04
Loss = 2.0765e-03, PNorm = 153.2932, GNorm = 0.0590, lr_0 = 1.8313e-04
Loss = 4.6650e-03, PNorm = 153.2962, GNorm = 0.3935, lr_0 = 1.8301e-04
Loss = 1.8740e-03, PNorm = 153.3022, GNorm = 0.2063, lr_0 = 1.8288e-04
Loss = 2.4604e-03, PNorm = 153.3043, GNorm = 0.2103, lr_0 = 1.8276e-04
Loss = 1.5011e-03, PNorm = 153.3085, GNorm = 0.0904, lr_0 = 1.8263e-04
Loss = 1.2233e-03, PNorm = 153.3102, GNorm = 0.0696, lr_0 = 1.8251e-04
Loss = 1.5410e-03, PNorm = 153.3131, GNorm = 0.1273, lr_0 = 1.8238e-04
Loss = 1.8846e-03, PNorm = 153.3183, GNorm = 0.0813, lr_0 = 1.8226e-04
Loss = 2.1656e-03, PNorm = 153.3212, GNorm = 0.2362, lr_0 = 1.8213e-04
Loss = 1.8706e-03, PNorm = 153.3223, GNorm = 0.0607, lr_0 = 1.8201e-04
Loss = 1.9076e-03, PNorm = 153.3226, GNorm = 0.1220, lr_0 = 1.8188e-04
Loss = 1.5583e-03, PNorm = 153.3273, GNorm = 0.0823, lr_0 = 1.8176e-04
Loss = 1.3466e-03, PNorm = 153.3324, GNorm = 0.0932, lr_0 = 1.8163e-04
Loss = 1.4672e-03, PNorm = 153.3362, GNorm = 0.0640, lr_0 = 1.8151e-04
Loss = 1.9966e-03, PNorm = 153.3413, GNorm = 0.0703, lr_0 = 1.8138e-04
Loss = 3.2027e-03, PNorm = 153.3442, GNorm = 0.1595, lr_0 = 1.8126e-04
Loss = 2.4797e-03, PNorm = 153.3482, GNorm = 0.0873, lr_0 = 1.8114e-04
Loss = 2.0362e-03, PNorm = 153.3524, GNorm = 0.1776, lr_0 = 1.8101e-04
Loss = 1.3079e-03, PNorm = 153.3551, GNorm = 0.0923, lr_0 = 1.8089e-04
Loss = 3.5577e-03, PNorm = 153.3578, GNorm = 0.2270, lr_0 = 1.8076e-04
Loss = 1.1965e-03, PNorm = 153.3620, GNorm = 0.0927, lr_0 = 1.8064e-04
Loss = 1.2981e-03, PNorm = 153.3666, GNorm = 0.1168, lr_0 = 1.8052e-04
Loss = 1.6430e-03, PNorm = 153.3716, GNorm = 0.1756, lr_0 = 1.8039e-04
Loss = 1.6877e-03, PNorm = 153.3760, GNorm = 0.1209, lr_0 = 1.8027e-04
Loss = 1.8643e-03, PNorm = 153.3791, GNorm = 0.0419, lr_0 = 1.8015e-04
Loss = 1.6103e-03, PNorm = 153.3825, GNorm = 0.1131, lr_0 = 1.8002e-04
Loss = 3.0027e-03, PNorm = 153.3871, GNorm = 0.0686, lr_0 = 1.7990e-04
Loss = 3.3406e-03, PNorm = 153.3911, GNorm = 0.0501, lr_0 = 1.7978e-04
Loss = 3.8676e-03, PNorm = 153.3949, GNorm = 0.1170, lr_0 = 1.7965e-04
Loss = 2.6401e-03, PNorm = 153.3962, GNorm = 0.1393, lr_0 = 1.7953e-04
Loss = 1.5265e-03, PNorm = 153.4001, GNorm = 0.2171, lr_0 = 1.7941e-04
Loss = 1.7541e-03, PNorm = 153.4026, GNorm = 0.1136, lr_0 = 1.7928e-04
Loss = 1.2221e-03, PNorm = 153.4050, GNorm = 0.0712, lr_0 = 1.7916e-04
Loss = 1.6255e-03, PNorm = 153.4104, GNorm = 0.1841, lr_0 = 1.7904e-04
Loss = 2.6084e-03, PNorm = 153.4120, GNorm = 0.0786, lr_0 = 1.7892e-04
Loss = 1.4827e-03, PNorm = 153.4156, GNorm = 0.0470, lr_0 = 1.7879e-04
Loss = 3.5540e-03, PNorm = 153.4201, GNorm = 0.2540, lr_0 = 1.7867e-04
Loss = 1.2938e-03, PNorm = 153.4242, GNorm = 0.1105, lr_0 = 1.7855e-04
Loss = 1.8909e-03, PNorm = 153.4284, GNorm = 0.2434, lr_0 = 1.7843e-04
Loss = 3.9756e-03, PNorm = 153.4341, GNorm = 0.2267, lr_0 = 1.7830e-04
Loss = 1.7892e-03, PNorm = 153.4397, GNorm = 0.2054, lr_0 = 1.7818e-04
Loss = 1.2896e-03, PNorm = 153.4460, GNorm = 0.2303, lr_0 = 1.7806e-04
Loss = 1.3565e-03, PNorm = 153.4484, GNorm = 0.1286, lr_0 = 1.7794e-04
Loss = 2.6360e-03, PNorm = 153.4517, GNorm = 0.0792, lr_0 = 1.7782e-04
Validation mae = 0.477579
Epoch 23
Loss = 1.6677e-03, PNorm = 153.4571, GNorm = 0.1182, lr_0 = 1.7769e-04
Loss = 2.8360e-03, PNorm = 153.4584, GNorm = 0.1555, lr_0 = 1.7757e-04
Loss = 1.3127e-03, PNorm = 153.4595, GNorm = 0.1370, lr_0 = 1.7745e-04
Loss = 1.3791e-03, PNorm = 153.4601, GNorm = 0.0535, lr_0 = 1.7733e-04
Loss = 1.3700e-03, PNorm = 153.4625, GNorm = 0.1048, lr_0 = 1.7721e-04
Loss = 1.3939e-03, PNorm = 153.4648, GNorm = 0.0813, lr_0 = 1.7709e-04
Loss = 1.2613e-03, PNorm = 153.4661, GNorm = 0.0894, lr_0 = 1.7696e-04
Loss = 2.0482e-03, PNorm = 153.4674, GNorm = 0.1024, lr_0 = 1.7684e-04
Loss = 1.5464e-03, PNorm = 153.4676, GNorm = 0.0883, lr_0 = 1.7672e-04
Loss = 1.3790e-03, PNorm = 153.4723, GNorm = 0.1599, lr_0 = 1.7660e-04
Loss = 2.1231e-03, PNorm = 153.4758, GNorm = 0.1009, lr_0 = 1.7648e-04
Loss = 3.7893e-03, PNorm = 153.4785, GNorm = 0.1362, lr_0 = 1.7636e-04
Loss = 1.2084e-03, PNorm = 153.4831, GNorm = 0.1478, lr_0 = 1.7624e-04
Loss = 1.5434e-03, PNorm = 153.4879, GNorm = 0.2651, lr_0 = 1.7612e-04
Loss = 1.0657e-03, PNorm = 153.4884, GNorm = 0.0965, lr_0 = 1.7600e-04
Loss = 2.6637e-03, PNorm = 153.4905, GNorm = 0.1005, lr_0 = 1.7588e-04
Loss = 4.5820e-03, PNorm = 153.4921, GNorm = 0.1023, lr_0 = 1.7576e-04
Loss = 1.5549e-03, PNorm = 153.4941, GNorm = 0.1302, lr_0 = 1.7564e-04
Loss = 1.1769e-03, PNorm = 153.4985, GNorm = 0.1340, lr_0 = 1.7552e-04
Loss = 1.1596e-03, PNorm = 153.5006, GNorm = 0.1491, lr_0 = 1.7540e-04
Loss = 1.4264e-03, PNorm = 153.5045, GNorm = 0.0841, lr_0 = 1.7528e-04
Loss = 1.7981e-03, PNorm = 153.5087, GNorm = 0.0997, lr_0 = 1.7516e-04
Loss = 1.5277e-03, PNorm = 153.5108, GNorm = 0.0942, lr_0 = 1.7504e-04
Loss = 2.3260e-03, PNorm = 153.5125, GNorm = 0.1574, lr_0 = 1.7492e-04
Loss = 1.9087e-03, PNorm = 153.5177, GNorm = 0.0687, lr_0 = 1.7480e-04
Loss = 1.1709e-03, PNorm = 153.5206, GNorm = 0.1355, lr_0 = 1.7468e-04
Loss = 1.7552e-03, PNorm = 153.5243, GNorm = 0.1740, lr_0 = 1.7456e-04
Loss = 1.4606e-03, PNorm = 153.5270, GNorm = 0.0583, lr_0 = 1.7444e-04
Loss = 1.7806e-03, PNorm = 153.5292, GNorm = 0.1327, lr_0 = 1.7432e-04
Loss = 1.0937e-03, PNorm = 153.5334, GNorm = 0.1171, lr_0 = 1.7420e-04
Loss = 1.7676e-03, PNorm = 153.5387, GNorm = 0.0623, lr_0 = 1.7408e-04
Loss = 1.6185e-03, PNorm = 153.5434, GNorm = 0.0571, lr_0 = 1.7396e-04
Loss = 1.6231e-03, PNorm = 153.5435, GNorm = 0.1142, lr_0 = 1.7384e-04
Loss = 3.0548e-03, PNorm = 153.5478, GNorm = 0.2489, lr_0 = 1.7372e-04
Loss = 2.5506e-03, PNorm = 153.5511, GNorm = 0.1078, lr_0 = 1.7360e-04
Loss = 2.5903e-03, PNorm = 153.5542, GNorm = 0.2221, lr_0 = 1.7348e-04
Loss = 2.1887e-03, PNorm = 153.5537, GNorm = 0.1420, lr_0 = 1.7336e-04
Loss = 1.2201e-03, PNorm = 153.5534, GNorm = 0.0300, lr_0 = 1.7325e-04
Loss = 1.6600e-03, PNorm = 153.5535, GNorm = 0.0668, lr_0 = 1.7313e-04
Loss = 1.5285e-03, PNorm = 153.5557, GNorm = 0.1006, lr_0 = 1.7301e-04
Loss = 1.1500e-03, PNorm = 153.5588, GNorm = 0.0332, lr_0 = 1.7289e-04
Loss = 2.6504e-03, PNorm = 153.5610, GNorm = 0.1425, lr_0 = 1.7277e-04
Loss = 1.0734e-03, PNorm = 153.5649, GNorm = 0.1160, lr_0 = 1.7265e-04
Loss = 1.9207e-03, PNorm = 153.5672, GNorm = 0.2237, lr_0 = 1.7253e-04
Loss = 2.9582e-03, PNorm = 153.5706, GNorm = 0.0624, lr_0 = 1.7242e-04
Loss = 3.2965e-03, PNorm = 153.5747, GNorm = 0.1273, lr_0 = 1.7230e-04
Loss = 2.6492e-03, PNorm = 153.5780, GNorm = 0.0785, lr_0 = 1.7218e-04
Loss = 1.3107e-03, PNorm = 153.5815, GNorm = 0.1325, lr_0 = 1.7206e-04
Loss = 1.8541e-03, PNorm = 153.5833, GNorm = 0.0432, lr_0 = 1.7194e-04
Loss = 1.7454e-03, PNorm = 153.5837, GNorm = 0.1371, lr_0 = 1.7183e-04
Loss = 2.2719e-03, PNorm = 153.5862, GNorm = 0.0738, lr_0 = 1.7171e-04
Loss = 1.3179e-03, PNorm = 153.5901, GNorm = 0.0893, lr_0 = 1.7159e-04
Loss = 1.2548e-03, PNorm = 153.5915, GNorm = 0.0528, lr_0 = 1.7147e-04
Loss = 1.4470e-03, PNorm = 153.5932, GNorm = 0.1554, lr_0 = 1.7136e-04
Loss = 2.5187e-03, PNorm = 153.5954, GNorm = 0.0663, lr_0 = 1.7124e-04
Loss = 2.3242e-03, PNorm = 153.5978, GNorm = 0.0814, lr_0 = 1.7112e-04
Loss = 2.2202e-03, PNorm = 153.6018, GNorm = 0.0431, lr_0 = 1.7100e-04
Loss = 3.2935e-03, PNorm = 153.6053, GNorm = 0.2718, lr_0 = 1.7089e-04
Loss = 2.4802e-03, PNorm = 153.6101, GNorm = 0.2574, lr_0 = 1.7077e-04
Loss = 1.6794e-03, PNorm = 153.6126, GNorm = 0.1729, lr_0 = 1.7065e-04
Loss = 1.0527e-03, PNorm = 153.6144, GNorm = 0.0831, lr_0 = 1.7054e-04
Loss = 2.9249e-03, PNorm = 153.6181, GNorm = 0.0321, lr_0 = 1.7042e-04
Loss = 1.1201e-03, PNorm = 153.6226, GNorm = 0.0765, lr_0 = 1.7030e-04
Loss = 1.9367e-03, PNorm = 153.6227, GNorm = 0.0611, lr_0 = 1.7019e-04
Loss = 1.8260e-03, PNorm = 153.6246, GNorm = 0.0509, lr_0 = 1.7007e-04
Loss = 1.7690e-03, PNorm = 153.6260, GNorm = 0.0654, lr_0 = 1.6995e-04
Loss = 1.0770e-03, PNorm = 153.6283, GNorm = 0.1098, lr_0 = 1.6984e-04
Loss = 1.0868e-03, PNorm = 153.6313, GNorm = 0.0414, lr_0 = 1.6972e-04
Loss = 1.4841e-03, PNorm = 153.6335, GNorm = 0.0586, lr_0 = 1.6960e-04
Loss = 1.5008e-03, PNorm = 153.6365, GNorm = 0.2392, lr_0 = 1.6949e-04
Loss = 1.6501e-03, PNorm = 153.6385, GNorm = 0.0801, lr_0 = 1.6937e-04
Loss = 1.1409e-03, PNorm = 153.6425, GNorm = 0.1706, lr_0 = 1.6926e-04
Loss = 1.6681e-03, PNorm = 153.6455, GNorm = 0.0742, lr_0 = 1.6914e-04
Loss = 1.0746e-03, PNorm = 153.6474, GNorm = 0.0585, lr_0 = 1.6902e-04
Loss = 1.0676e-03, PNorm = 153.6501, GNorm = 0.0758, lr_0 = 1.6891e-04
Loss = 1.1850e-03, PNorm = 153.6531, GNorm = 0.0585, lr_0 = 1.6879e-04
Loss = 1.1261e-03, PNorm = 153.6552, GNorm = 0.0572, lr_0 = 1.6868e-04
Loss = 1.3999e-03, PNorm = 153.6575, GNorm = 0.1864, lr_0 = 1.6856e-04
Loss = 4.2104e-03, PNorm = 153.6610, GNorm = 0.0979, lr_0 = 1.6845e-04
Loss = 1.2867e-03, PNorm = 153.6645, GNorm = 0.0798, lr_0 = 1.6833e-04
Loss = 1.3110e-03, PNorm = 153.6658, GNorm = 0.0495, lr_0 = 1.6821e-04
Loss = 1.1191e-03, PNorm = 153.6694, GNorm = 0.0760, lr_0 = 1.6810e-04
Loss = 1.9116e-03, PNorm = 153.6736, GNorm = 0.0787, lr_0 = 1.6798e-04
Loss = 1.9931e-03, PNorm = 153.6783, GNorm = 0.0915, lr_0 = 1.6787e-04
Loss = 2.6403e-03, PNorm = 153.6830, GNorm = 0.0910, lr_0 = 1.6775e-04
Loss = 2.0384e-03, PNorm = 153.6841, GNorm = 0.4698, lr_0 = 1.6764e-04
Loss = 4.0735e-03, PNorm = 153.6868, GNorm = 0.1012, lr_0 = 1.6752e-04
Loss = 1.2927e-03, PNorm = 153.6913, GNorm = 0.0323, lr_0 = 1.6741e-04
Loss = 2.4167e-03, PNorm = 153.6938, GNorm = 0.0520, lr_0 = 1.6729e-04
Loss = 1.2961e-03, PNorm = 153.6952, GNorm = 0.0475, lr_0 = 1.6718e-04
Loss = 1.6721e-03, PNorm = 153.6989, GNorm = 0.1350, lr_0 = 1.6707e-04
Loss = 1.8126e-03, PNorm = 153.7039, GNorm = 0.1536, lr_0 = 1.6695e-04
Loss = 1.2190e-03, PNorm = 153.7080, GNorm = 0.2264, lr_0 = 1.6684e-04
Loss = 4.4032e-03, PNorm = 153.7112, GNorm = 0.4995, lr_0 = 1.6672e-04
Loss = 1.5805e-03, PNorm = 153.7169, GNorm = 0.1117, lr_0 = 1.6661e-04
Loss = 1.7526e-03, PNorm = 153.7181, GNorm = 0.1117, lr_0 = 1.6649e-04
Loss = 1.3742e-03, PNorm = 153.7194, GNorm = 0.0527, lr_0 = 1.6638e-04
Loss = 2.7962e-03, PNorm = 153.7233, GNorm = 0.0359, lr_0 = 1.6627e-04
Loss = 3.4559e-03, PNorm = 153.7274, GNorm = 0.0535, lr_0 = 1.6615e-04
Loss = 1.4735e-03, PNorm = 153.7318, GNorm = 0.1258, lr_0 = 1.6604e-04
Loss = 3.0121e-03, PNorm = 153.7350, GNorm = 0.0943, lr_0 = 1.6592e-04
Loss = 2.0005e-03, PNorm = 153.7346, GNorm = 0.1770, lr_0 = 1.6581e-04
Loss = 1.6609e-03, PNorm = 153.7356, GNorm = 0.0702, lr_0 = 1.6570e-04
Loss = 2.7867e-03, PNorm = 153.7380, GNorm = 0.1262, lr_0 = 1.6558e-04
Loss = 2.8360e-03, PNorm = 153.7423, GNorm = 0.1206, lr_0 = 1.6547e-04
Loss = 1.2806e-03, PNorm = 153.7444, GNorm = 0.0797, lr_0 = 1.6536e-04
Loss = 1.2447e-03, PNorm = 153.7494, GNorm = 0.1297, lr_0 = 1.6524e-04
Loss = 2.1298e-03, PNorm = 153.7520, GNorm = 0.0853, lr_0 = 1.6513e-04
Loss = 2.4589e-03, PNorm = 153.7541, GNorm = 0.1642, lr_0 = 1.6502e-04
Loss = 1.3537e-03, PNorm = 153.7556, GNorm = 0.1318, lr_0 = 1.6490e-04
Loss = 1.3705e-03, PNorm = 153.7579, GNorm = 0.0958, lr_0 = 1.6479e-04
Loss = 1.2569e-03, PNorm = 153.7587, GNorm = 0.1165, lr_0 = 1.6468e-04
Loss = 2.5482e-03, PNorm = 153.7617, GNorm = 0.2165, lr_0 = 1.6457e-04
Loss = 1.0849e-03, PNorm = 153.7662, GNorm = 0.1103, lr_0 = 1.6445e-04
Loss = 1.4933e-03, PNorm = 153.7695, GNorm = 0.1024, lr_0 = 1.6434e-04
Loss = 2.4368e-03, PNorm = 153.7743, GNorm = 0.1555, lr_0 = 1.6423e-04
Loss = 1.3327e-03, PNorm = 153.7781, GNorm = 0.1319, lr_0 = 1.6412e-04
Loss = 2.2616e-03, PNorm = 153.7792, GNorm = 0.1663, lr_0 = 1.6400e-04
Loss = 2.5929e-03, PNorm = 153.7811, GNorm = 0.1279, lr_0 = 1.6389e-04
Loss = 1.1409e-03, PNorm = 153.7863, GNorm = 0.0509, lr_0 = 1.6378e-04
Validation mae = 0.477326
Epoch 24
Loss = 1.5946e-03, PNorm = 153.7892, GNorm = 0.1002, lr_0 = 1.6367e-04
Loss = 1.7961e-03, PNorm = 153.7918, GNorm = 0.1606, lr_0 = 1.6355e-04
Loss = 2.3704e-03, PNorm = 153.7935, GNorm = 0.1528, lr_0 = 1.6344e-04
Loss = 1.0788e-03, PNorm = 153.7963, GNorm = 0.2034, lr_0 = 1.6333e-04
Loss = 1.2702e-03, PNorm = 153.7977, GNorm = 0.0591, lr_0 = 1.6322e-04
Loss = 1.6876e-03, PNorm = 153.8004, GNorm = 0.0675, lr_0 = 1.6311e-04
Loss = 1.5460e-03, PNorm = 153.8031, GNorm = 0.1245, lr_0 = 1.6299e-04
Loss = 9.2766e-04, PNorm = 153.8052, GNorm = 0.2285, lr_0 = 1.6288e-04
Loss = 2.7089e-03, PNorm = 153.8051, GNorm = 0.1793, lr_0 = 1.6277e-04
Loss = 1.4300e-03, PNorm = 153.8053, GNorm = 0.1110, lr_0 = 1.6266e-04
Loss = 1.7324e-03, PNorm = 153.8054, GNorm = 0.0669, lr_0 = 1.6255e-04
Loss = 1.0594e-03, PNorm = 153.8087, GNorm = 0.1393, lr_0 = 1.6244e-04
Loss = 1.5162e-03, PNorm = 153.8124, GNorm = 0.1051, lr_0 = 1.6233e-04
Loss = 2.6724e-03, PNorm = 153.8153, GNorm = 0.1446, lr_0 = 1.6221e-04
Loss = 1.0227e-03, PNorm = 153.8184, GNorm = 0.0695, lr_0 = 1.6210e-04
Loss = 2.2275e-03, PNorm = 153.8215, GNorm = 0.0654, lr_0 = 1.6199e-04
Loss = 1.0113e-03, PNorm = 153.8242, GNorm = 0.1211, lr_0 = 1.6188e-04
Loss = 1.0637e-03, PNorm = 153.8259, GNorm = 0.1644, lr_0 = 1.6177e-04
Loss = 2.6194e-03, PNorm = 153.8270, GNorm = 0.2271, lr_0 = 1.6166e-04
Loss = 1.7531e-03, PNorm = 153.8288, GNorm = 0.2201, lr_0 = 1.6155e-04
Loss = 1.7282e-03, PNorm = 153.8297, GNorm = 0.1502, lr_0 = 1.6144e-04
Loss = 1.3250e-03, PNorm = 153.8328, GNorm = 0.2413, lr_0 = 1.6133e-04
Loss = 1.3509e-03, PNorm = 153.8357, GNorm = 0.1085, lr_0 = 1.6122e-04
Loss = 1.7211e-03, PNorm = 153.8385, GNorm = 0.0606, lr_0 = 1.6111e-04
Loss = 1.3467e-03, PNorm = 153.8423, GNorm = 0.1179, lr_0 = 1.6100e-04
Loss = 1.3705e-03, PNorm = 153.8446, GNorm = 0.0720, lr_0 = 1.6089e-04
Loss = 9.1091e-04, PNorm = 153.8465, GNorm = 0.1314, lr_0 = 1.6078e-04
Loss = 1.5477e-03, PNorm = 153.8487, GNorm = 0.1768, lr_0 = 1.6067e-04
Loss = 8.2029e-04, PNorm = 153.8516, GNorm = 0.1042, lr_0 = 1.6056e-04
Loss = 2.7848e-03, PNorm = 153.8537, GNorm = 0.0449, lr_0 = 1.6045e-04
Loss = 1.8247e-03, PNorm = 153.8561, GNorm = 0.1157, lr_0 = 1.6034e-04
Loss = 9.8287e-04, PNorm = 153.8581, GNorm = 0.0796, lr_0 = 1.6023e-04
Loss = 1.4262e-03, PNorm = 153.8626, GNorm = 0.0909, lr_0 = 1.6012e-04
Loss = 1.3992e-03, PNorm = 153.8642, GNorm = 0.0567, lr_0 = 1.6001e-04
Loss = 1.1489e-03, PNorm = 153.8682, GNorm = 0.2346, lr_0 = 1.5990e-04
Loss = 2.0396e-03, PNorm = 153.8698, GNorm = 0.0664, lr_0 = 1.5979e-04
Loss = 1.4759e-03, PNorm = 153.8725, GNorm = 0.1201, lr_0 = 1.5968e-04
Loss = 2.3829e-03, PNorm = 153.8741, GNorm = 0.2191, lr_0 = 1.5957e-04
Loss = 2.4828e-03, PNorm = 153.8763, GNorm = 0.0522, lr_0 = 1.5946e-04
Loss = 1.5146e-03, PNorm = 153.8781, GNorm = 0.0944, lr_0 = 1.5935e-04
Loss = 2.0244e-03, PNorm = 153.8795, GNorm = 0.3300, lr_0 = 1.5924e-04
Loss = 1.9033e-03, PNorm = 153.8832, GNorm = 0.0611, lr_0 = 1.5913e-04
Loss = 1.6486e-03, PNorm = 153.8871, GNorm = 0.1125, lr_0 = 1.5902e-04
Loss = 2.8203e-03, PNorm = 153.8889, GNorm = 0.1602, lr_0 = 1.5891e-04
Loss = 1.4123e-03, PNorm = 153.8927, GNorm = 0.1827, lr_0 = 1.5880e-04
Loss = 1.0187e-03, PNorm = 153.8944, GNorm = 0.0545, lr_0 = 1.5870e-04
Loss = 1.5184e-03, PNorm = 153.8972, GNorm = 0.1167, lr_0 = 1.5859e-04
Loss = 3.0313e-03, PNorm = 153.8968, GNorm = 0.0965, lr_0 = 1.5848e-04
Loss = 1.8440e-03, PNorm = 153.9003, GNorm = 0.0986, lr_0 = 1.5837e-04
Loss = 8.8918e-04, PNorm = 153.9015, GNorm = 0.0807, lr_0 = 1.5826e-04
Loss = 9.0059e-04, PNorm = 153.9049, GNorm = 0.0440, lr_0 = 1.5815e-04
Loss = 1.3762e-03, PNorm = 153.9087, GNorm = 0.0675, lr_0 = 1.5804e-04
Loss = 1.4943e-03, PNorm = 153.9107, GNorm = 0.1147, lr_0 = 1.5794e-04
Loss = 1.0286e-03, PNorm = 153.9111, GNorm = 0.1570, lr_0 = 1.5783e-04
Loss = 1.5807e-03, PNorm = 153.9111, GNorm = 0.0889, lr_0 = 1.5772e-04
Loss = 2.3277e-03, PNorm = 153.9122, GNorm = 0.0601, lr_0 = 1.5761e-04
Loss = 1.1550e-03, PNorm = 153.9151, GNorm = 0.0989, lr_0 = 1.5750e-04
Loss = 1.5851e-03, PNorm = 153.9207, GNorm = 0.1167, lr_0 = 1.5740e-04
Loss = 1.6148e-03, PNorm = 153.9255, GNorm = 0.0501, lr_0 = 1.5729e-04
Loss = 9.5120e-04, PNorm = 153.9294, GNorm = 0.0766, lr_0 = 1.5718e-04
Loss = 2.0714e-03, PNorm = 153.9318, GNorm = 0.1751, lr_0 = 1.5707e-04
Loss = 2.4534e-03, PNorm = 153.9347, GNorm = 0.2162, lr_0 = 1.5697e-04
Loss = 1.0286e-03, PNorm = 153.9350, GNorm = 0.0641, lr_0 = 1.5686e-04
Loss = 2.4225e-03, PNorm = 153.9368, GNorm = 0.0737, lr_0 = 1.5675e-04
Loss = 1.8967e-03, PNorm = 153.9385, GNorm = 0.1644, lr_0 = 1.5664e-04
Loss = 1.5364e-03, PNorm = 153.9419, GNorm = 0.1442, lr_0 = 1.5654e-04
Loss = 1.2736e-03, PNorm = 153.9444, GNorm = 0.1102, lr_0 = 1.5643e-04
Loss = 1.4653e-03, PNorm = 153.9471, GNorm = 0.0671, lr_0 = 1.5632e-04
Loss = 1.7626e-03, PNorm = 153.9519, GNorm = 0.0671, lr_0 = 1.5621e-04
Loss = 1.1047e-03, PNorm = 153.9532, GNorm = 0.1434, lr_0 = 1.5611e-04
Loss = 1.5699e-03, PNorm = 153.9543, GNorm = 0.0984, lr_0 = 1.5600e-04
Loss = 2.4378e-03, PNorm = 153.9550, GNorm = 0.1072, lr_0 = 1.5589e-04
Loss = 1.1660e-03, PNorm = 153.9594, GNorm = 0.0796, lr_0 = 1.5579e-04
Loss = 1.1826e-03, PNorm = 153.9627, GNorm = 0.2052, lr_0 = 1.5568e-04
Loss = 1.6268e-03, PNorm = 153.9663, GNorm = 0.1088, lr_0 = 1.5557e-04
Loss = 1.8282e-03, PNorm = 153.9708, GNorm = 0.1759, lr_0 = 1.5547e-04
Loss = 1.0094e-03, PNorm = 153.9716, GNorm = 0.0949, lr_0 = 1.5536e-04
Loss = 3.8887e-03, PNorm = 153.9761, GNorm = 0.0584, lr_0 = 1.5525e-04
Loss = 4.2666e-03, PNorm = 153.9807, GNorm = 0.1608, lr_0 = 1.5515e-04
Loss = 8.4895e-04, PNorm = 153.9842, GNorm = 0.2109, lr_0 = 1.5504e-04
Loss = 1.4522e-03, PNorm = 153.9855, GNorm = 0.1200, lr_0 = 1.5493e-04
Loss = 1.1835e-03, PNorm = 153.9863, GNorm = 0.2161, lr_0 = 1.5483e-04
Loss = 1.5982e-03, PNorm = 153.9879, GNorm = 0.0812, lr_0 = 1.5472e-04
Loss = 8.8424e-04, PNorm = 153.9906, GNorm = 0.0282, lr_0 = 1.5462e-04
Loss = 1.6197e-03, PNorm = 153.9932, GNorm = 0.0446, lr_0 = 1.5451e-04
Loss = 1.1511e-03, PNorm = 153.9951, GNorm = 0.0838, lr_0 = 1.5440e-04
Loss = 8.7564e-04, PNorm = 153.9964, GNorm = 0.1409, lr_0 = 1.5430e-04
Loss = 9.1387e-04, PNorm = 153.9981, GNorm = 0.1188, lr_0 = 1.5419e-04
Loss = 3.6013e-03, PNorm = 154.0016, GNorm = 0.0648, lr_0 = 1.5409e-04
Loss = 1.7918e-03, PNorm = 154.0042, GNorm = 0.0381, lr_0 = 1.5398e-04
Loss = 1.3885e-03, PNorm = 154.0065, GNorm = 0.1094, lr_0 = 1.5388e-04
Loss = 1.0052e-03, PNorm = 154.0075, GNorm = 0.1077, lr_0 = 1.5377e-04
Loss = 1.9281e-03, PNorm = 154.0108, GNorm = 0.1012, lr_0 = 1.5367e-04
Loss = 1.4328e-03, PNorm = 154.0129, GNorm = 0.0972, lr_0 = 1.5356e-04
Loss = 2.9820e-03, PNorm = 154.0149, GNorm = 0.0720, lr_0 = 1.5346e-04
Loss = 1.6888e-03, PNorm = 154.0160, GNorm = 0.2378, lr_0 = 1.5335e-04
Loss = 1.0175e-03, PNorm = 154.0167, GNorm = 0.0590, lr_0 = 1.5325e-04
Loss = 2.3450e-03, PNorm = 154.0190, GNorm = 0.1116, lr_0 = 1.5314e-04
Loss = 1.8661e-03, PNorm = 154.0220, GNorm = 0.3453, lr_0 = 1.5304e-04
Loss = 4.9011e-03, PNorm = 154.0233, GNorm = 0.2291, lr_0 = 1.5293e-04
Loss = 1.4456e-03, PNorm = 154.0265, GNorm = 0.2014, lr_0 = 1.5283e-04
Loss = 4.2139e-03, PNorm = 154.0272, GNorm = 0.0922, lr_0 = 1.5272e-04
Loss = 1.1231e-03, PNorm = 154.0319, GNorm = 0.0327, lr_0 = 1.5262e-04
Loss = 1.1028e-03, PNorm = 154.0342, GNorm = 0.1463, lr_0 = 1.5251e-04
Loss = 2.0651e-03, PNorm = 154.0374, GNorm = 0.1603, lr_0 = 1.5241e-04
Loss = 1.0719e-03, PNorm = 154.0395, GNorm = 0.1406, lr_0 = 1.5230e-04
Loss = 1.5278e-03, PNorm = 154.0416, GNorm = 0.1616, lr_0 = 1.5220e-04
Loss = 1.7374e-03, PNorm = 154.0436, GNorm = 0.1504, lr_0 = 1.5209e-04
Loss = 1.0120e-03, PNorm = 154.0455, GNorm = 0.0442, lr_0 = 1.5199e-04
Loss = 1.4333e-03, PNorm = 154.0473, GNorm = 0.2013, lr_0 = 1.5189e-04
Loss = 1.6665e-03, PNorm = 154.0512, GNorm = 0.0827, lr_0 = 1.5178e-04
Loss = 1.7614e-03, PNorm = 154.0521, GNorm = 0.0818, lr_0 = 1.5168e-04
Loss = 2.1816e-03, PNorm = 154.0529, GNorm = 0.1253, lr_0 = 1.5157e-04
Loss = 2.3703e-03, PNorm = 154.0540, GNorm = 0.2345, lr_0 = 1.5147e-04
Loss = 9.5908e-04, PNorm = 154.0569, GNorm = 0.0932, lr_0 = 1.5137e-04
Loss = 9.8815e-04, PNorm = 154.0605, GNorm = 0.0991, lr_0 = 1.5126e-04
Loss = 9.5957e-04, PNorm = 154.0635, GNorm = 0.0662, lr_0 = 1.5116e-04
Loss = 1.8317e-03, PNorm = 154.0669, GNorm = 0.1053, lr_0 = 1.5106e-04
Loss = 9.1542e-04, PNorm = 154.0700, GNorm = 0.1390, lr_0 = 1.5095e-04
Loss = 2.9103e-03, PNorm = 154.0717, GNorm = 0.3271, lr_0 = 1.5085e-04
Validation mae = 0.477966
Epoch 25
Loss = 1.0101e-03, PNorm = 154.0711, GNorm = 0.1902, lr_0 = 1.5075e-04
Loss = 1.0941e-03, PNorm = 154.0724, GNorm = 0.1216, lr_0 = 1.5064e-04
Loss = 1.2138e-03, PNorm = 154.0754, GNorm = 0.0759, lr_0 = 1.5054e-04
Loss = 9.2843e-04, PNorm = 154.0781, GNorm = 0.0438, lr_0 = 1.5044e-04
Loss = 1.0777e-03, PNorm = 154.0809, GNorm = 0.0785, lr_0 = 1.5033e-04
Loss = 1.2010e-03, PNorm = 154.0827, GNorm = 0.0961, lr_0 = 1.5023e-04
Loss = 8.3744e-04, PNorm = 154.0857, GNorm = 0.1648, lr_0 = 1.5013e-04
Loss = 1.9350e-03, PNorm = 154.0866, GNorm = 0.0310, lr_0 = 1.5002e-04
Loss = 1.1040e-03, PNorm = 154.0893, GNorm = 0.0355, lr_0 = 1.4992e-04
Loss = 1.7631e-03, PNorm = 154.0905, GNorm = 0.0497, lr_0 = 1.4982e-04
Loss = 2.0574e-03, PNorm = 154.0917, GNorm = 0.0881, lr_0 = 1.4972e-04
Loss = 2.4351e-03, PNorm = 154.0926, GNorm = 0.0947, lr_0 = 1.4961e-04
Loss = 8.2033e-04, PNorm = 154.0950, GNorm = 0.0409, lr_0 = 1.4951e-04
Loss = 7.8121e-04, PNorm = 154.0968, GNorm = 0.0363, lr_0 = 1.4941e-04
Loss = 1.2502e-03, PNorm = 154.0991, GNorm = 0.1609, lr_0 = 1.4931e-04
Loss = 9.0598e-04, PNorm = 154.1014, GNorm = 0.0505, lr_0 = 1.4920e-04
Loss = 1.9739e-03, PNorm = 154.1020, GNorm = 0.1126, lr_0 = 1.4910e-04
Loss = 8.1849e-04, PNorm = 154.1036, GNorm = 0.0438, lr_0 = 1.4900e-04
Loss = 2.0384e-03, PNorm = 154.1058, GNorm = 0.0996, lr_0 = 1.4890e-04
Loss = 1.7248e-03, PNorm = 154.1070, GNorm = 0.0904, lr_0 = 1.4880e-04
Loss = 3.4401e-03, PNorm = 154.1089, GNorm = 0.1258, lr_0 = 1.4869e-04
Loss = 1.1242e-03, PNorm = 154.1099, GNorm = 0.0919, lr_0 = 1.4859e-04
Loss = 1.2189e-03, PNorm = 154.1109, GNorm = 0.0595, lr_0 = 1.4849e-04
Loss = 1.6740e-03, PNorm = 154.1141, GNorm = 0.1609, lr_0 = 1.4839e-04
Loss = 1.2970e-03, PNorm = 154.1163, GNorm = 0.1160, lr_0 = 1.4829e-04
Loss = 2.3655e-03, PNorm = 154.1194, GNorm = 0.0609, lr_0 = 1.4818e-04
Loss = 7.9652e-04, PNorm = 154.1217, GNorm = 0.1031, lr_0 = 1.4808e-04
Loss = 8.8564e-04, PNorm = 154.1244, GNorm = 0.0900, lr_0 = 1.4798e-04
Loss = 9.2433e-04, PNorm = 154.1271, GNorm = 0.0812, lr_0 = 1.4788e-04
Loss = 2.3058e-03, PNorm = 154.1305, GNorm = 0.1098, lr_0 = 1.4778e-04
Loss = 1.1123e-03, PNorm = 154.1324, GNorm = 0.1308, lr_0 = 1.4768e-04
Loss = 9.2964e-04, PNorm = 154.1365, GNorm = 0.0714, lr_0 = 1.4758e-04
Loss = 9.7604e-04, PNorm = 154.1380, GNorm = 0.0620, lr_0 = 1.4748e-04
Loss = 1.3938e-03, PNorm = 154.1395, GNorm = 0.1410, lr_0 = 1.4737e-04
Loss = 1.2777e-03, PNorm = 154.1419, GNorm = 0.1073, lr_0 = 1.4727e-04
Loss = 1.0300e-03, PNorm = 154.1437, GNorm = 0.1330, lr_0 = 1.4717e-04
Loss = 1.2645e-03, PNorm = 154.1458, GNorm = 0.0366, lr_0 = 1.4707e-04
Loss = 1.6608e-03, PNorm = 154.1472, GNorm = 0.0297, lr_0 = 1.4697e-04
Loss = 4.1561e-03, PNorm = 154.1478, GNorm = 0.1445, lr_0 = 1.4687e-04
Loss = 1.6342e-03, PNorm = 154.1528, GNorm = 0.1881, lr_0 = 1.4677e-04
Loss = 1.8934e-03, PNorm = 154.1545, GNorm = 0.2163, lr_0 = 1.4667e-04
Loss = 1.0138e-03, PNorm = 154.1577, GNorm = 0.0799, lr_0 = 1.4657e-04
Loss = 1.2592e-03, PNorm = 154.1604, GNorm = 0.0391, lr_0 = 1.4647e-04
Loss = 1.1315e-03, PNorm = 154.1618, GNorm = 0.1155, lr_0 = 1.4637e-04
Loss = 4.2111e-03, PNorm = 154.1638, GNorm = 0.1992, lr_0 = 1.4627e-04
Loss = 1.8845e-03, PNorm = 154.1654, GNorm = 0.1143, lr_0 = 1.4617e-04
Loss = 1.3582e-03, PNorm = 154.1683, GNorm = 0.0857, lr_0 = 1.4607e-04
Loss = 1.0864e-03, PNorm = 154.1714, GNorm = 0.0497, lr_0 = 1.4597e-04
Loss = 7.8194e-04, PNorm = 154.1726, GNorm = 0.0571, lr_0 = 1.4587e-04
Loss = 2.0052e-03, PNorm = 154.1747, GNorm = 0.2819, lr_0 = 1.4577e-04
Loss = 1.6315e-03, PNorm = 154.1751, GNorm = 0.3025, lr_0 = 1.4567e-04
Loss = 2.6110e-03, PNorm = 154.1751, GNorm = 0.1228, lr_0 = 1.4557e-04
Loss = 8.2555e-04, PNorm = 154.1756, GNorm = 0.1386, lr_0 = 1.4547e-04
Loss = 1.3084e-03, PNorm = 154.1778, GNorm = 0.0770, lr_0 = 1.4537e-04
Loss = 3.2330e-03, PNorm = 154.1818, GNorm = 0.0596, lr_0 = 1.4527e-04
Loss = 1.7229e-03, PNorm = 154.1850, GNorm = 0.0961, lr_0 = 1.4517e-04
Loss = 1.2159e-03, PNorm = 154.1885, GNorm = 0.2089, lr_0 = 1.4507e-04
Loss = 2.7275e-03, PNorm = 154.1903, GNorm = 0.2917, lr_0 = 1.4497e-04
Loss = 1.6201e-03, PNorm = 154.1913, GNorm = 0.1516, lr_0 = 1.4487e-04
Loss = 1.5252e-03, PNorm = 154.1946, GNorm = 0.0611, lr_0 = 1.4477e-04
Loss = 1.8330e-03, PNorm = 154.1969, GNorm = 0.0992, lr_0 = 1.4467e-04
Loss = 1.4758e-03, PNorm = 154.1987, GNorm = 0.2242, lr_0 = 1.4457e-04
Loss = 1.1755e-03, PNorm = 154.2010, GNorm = 0.0924, lr_0 = 1.4447e-04
Loss = 1.8902e-03, PNorm = 154.2054, GNorm = 0.3211, lr_0 = 1.4438e-04
Loss = 8.8725e-04, PNorm = 154.2076, GNorm = 0.0623, lr_0 = 1.4428e-04
Loss = 9.0502e-04, PNorm = 154.2101, GNorm = 0.0801, lr_0 = 1.4418e-04
Loss = 1.8245e-03, PNorm = 154.2111, GNorm = 0.0323, lr_0 = 1.4408e-04
Loss = 1.1386e-03, PNorm = 154.2132, GNorm = 0.1471, lr_0 = 1.4398e-04
Loss = 1.2684e-03, PNorm = 154.2138, GNorm = 0.1189, lr_0 = 1.4388e-04
Loss = 7.9526e-04, PNorm = 154.2153, GNorm = 0.1773, lr_0 = 1.4378e-04
Loss = 8.6847e-04, PNorm = 154.2176, GNorm = 0.1092, lr_0 = 1.4368e-04
Loss = 1.2852e-03, PNorm = 154.2205, GNorm = 0.1470, lr_0 = 1.4359e-04
Loss = 1.1192e-03, PNorm = 154.2229, GNorm = 0.1907, lr_0 = 1.4349e-04
Loss = 1.0034e-03, PNorm = 154.2248, GNorm = 0.0895, lr_0 = 1.4339e-04
Loss = 9.5512e-04, PNorm = 154.2264, GNorm = 0.0429, lr_0 = 1.4329e-04
Loss = 1.1373e-03, PNorm = 154.2285, GNorm = 0.1674, lr_0 = 1.4319e-04
Loss = 1.1543e-03, PNorm = 154.2288, GNorm = 0.0717, lr_0 = 1.4310e-04
Loss = 8.8392e-04, PNorm = 154.2290, GNorm = 0.0488, lr_0 = 1.4300e-04
Loss = 8.9472e-04, PNorm = 154.2309, GNorm = 0.0582, lr_0 = 1.4290e-04
Loss = 1.5397e-03, PNorm = 154.2333, GNorm = 0.1957, lr_0 = 1.4280e-04
Loss = 1.1159e-03, PNorm = 154.2338, GNorm = 0.1169, lr_0 = 1.4270e-04
Loss = 9.8522e-04, PNorm = 154.2355, GNorm = 0.2436, lr_0 = 1.4261e-04
Loss = 8.2031e-04, PNorm = 154.2374, GNorm = 0.0794, lr_0 = 1.4251e-04
Loss = 1.9979e-03, PNorm = 154.2415, GNorm = 0.0897, lr_0 = 1.4241e-04
Loss = 1.2668e-03, PNorm = 154.2436, GNorm = 0.1355, lr_0 = 1.4231e-04
Loss = 9.5904e-04, PNorm = 154.2476, GNorm = 0.0990, lr_0 = 1.4222e-04
Loss = 1.4433e-03, PNorm = 154.2510, GNorm = 0.1950, lr_0 = 1.4212e-04
Loss = 1.3912e-03, PNorm = 154.2538, GNorm = 0.0389, lr_0 = 1.4202e-04
Loss = 1.0036e-03, PNorm = 154.2562, GNorm = 0.0526, lr_0 = 1.4192e-04
Loss = 1.5345e-03, PNorm = 154.2572, GNorm = 0.1489, lr_0 = 1.4183e-04
Loss = 2.1757e-03, PNorm = 154.2578, GNorm = 0.2577, lr_0 = 1.4173e-04
Loss = 1.2572e-03, PNorm = 154.2601, GNorm = 0.0304, lr_0 = 1.4163e-04
Loss = 2.4893e-03, PNorm = 154.2634, GNorm = 0.3006, lr_0 = 1.4153e-04
Loss = 9.2586e-04, PNorm = 154.2660, GNorm = 0.0856, lr_0 = 1.4144e-04
Loss = 3.4970e-03, PNorm = 154.2688, GNorm = 0.2551, lr_0 = 1.4134e-04
Loss = 8.4543e-04, PNorm = 154.2708, GNorm = 0.0654, lr_0 = 1.4124e-04
Loss = 1.0018e-03, PNorm = 154.2747, GNorm = 0.1015, lr_0 = 1.4115e-04
Loss = 3.7536e-03, PNorm = 154.2759, GNorm = 0.1303, lr_0 = 1.4105e-04
Loss = 2.3516e-03, PNorm = 154.2754, GNorm = 0.2505, lr_0 = 1.4095e-04
Loss = 1.2729e-03, PNorm = 154.2767, GNorm = 0.2129, lr_0 = 1.4086e-04
Loss = 2.3634e-03, PNorm = 154.2807, GNorm = 0.0801, lr_0 = 1.4076e-04
Loss = 5.2223e-03, PNorm = 154.2822, GNorm = 0.0661, lr_0 = 1.4066e-04
Loss = 2.7405e-03, PNorm = 154.2842, GNorm = 0.0603, lr_0 = 1.4057e-04
Loss = 3.3750e-03, PNorm = 154.2861, GNorm = 0.2865, lr_0 = 1.4047e-04
Loss = 8.2862e-04, PNorm = 154.2895, GNorm = 0.1732, lr_0 = 1.4038e-04
Loss = 1.9348e-03, PNorm = 154.2922, GNorm = 0.1215, lr_0 = 1.4028e-04
Loss = 1.3379e-03, PNorm = 154.2955, GNorm = 0.1014, lr_0 = 1.4018e-04
Loss = 1.3630e-03, PNorm = 154.2971, GNorm = 0.0298, lr_0 = 1.4009e-04
Loss = 1.8467e-03, PNorm = 154.2989, GNorm = 0.3614, lr_0 = 1.3999e-04
Loss = 2.7302e-03, PNorm = 154.3040, GNorm = 0.4997, lr_0 = 1.3990e-04
Loss = 1.3680e-03, PNorm = 154.3086, GNorm = 0.1020, lr_0 = 1.3980e-04
Loss = 1.6123e-03, PNorm = 154.3109, GNorm = 0.1608, lr_0 = 1.3970e-04
Loss = 1.0751e-03, PNorm = 154.3125, GNorm = 0.1027, lr_0 = 1.3961e-04
Loss = 8.4533e-04, PNorm = 154.3134, GNorm = 0.1847, lr_0 = 1.3951e-04
Loss = 1.1691e-03, PNorm = 154.3154, GNorm = 0.2184, lr_0 = 1.3942e-04
Loss = 8.8266e-04, PNorm = 154.3156, GNorm = 0.0819, lr_0 = 1.3932e-04
Loss = 1.1883e-03, PNorm = 154.3175, GNorm = 0.1207, lr_0 = 1.3923e-04
Loss = 1.7719e-03, PNorm = 154.3205, GNorm = 0.1372, lr_0 = 1.3913e-04
Loss = 8.6920e-04, PNorm = 154.3218, GNorm = 0.0873, lr_0 = 1.3904e-04
Loss = 7.3834e-04, PNorm = 154.3240, GNorm = 0.0819, lr_0 = 1.3894e-04
Validation mae = 0.476984
Epoch 26
Loss = 1.3500e-03, PNorm = 154.3262, GNorm = 0.0258, lr_0 = 1.3884e-04
Loss = 7.8622e-04, PNorm = 154.3278, GNorm = 0.0456, lr_0 = 1.3875e-04
Loss = 1.2174e-03, PNorm = 154.3294, GNorm = 0.0637, lr_0 = 1.3865e-04
Loss = 1.2269e-03, PNorm = 154.3313, GNorm = 0.1220, lr_0 = 1.3856e-04
Loss = 6.3709e-04, PNorm = 154.3331, GNorm = 0.0654, lr_0 = 1.3846e-04
Loss = 8.3609e-04, PNorm = 154.3356, GNorm = 0.0749, lr_0 = 1.3837e-04
Loss = 1.2723e-03, PNorm = 154.3355, GNorm = 0.1584, lr_0 = 1.3828e-04
Loss = 1.2882e-03, PNorm = 154.3341, GNorm = 0.1589, lr_0 = 1.3818e-04
Loss = 2.5986e-03, PNorm = 154.3360, GNorm = 0.0939, lr_0 = 1.3809e-04
Loss = 7.3461e-04, PNorm = 154.3387, GNorm = 0.0441, lr_0 = 1.3799e-04
Loss = 9.4217e-04, PNorm = 154.3413, GNorm = 0.0601, lr_0 = 1.3790e-04
Loss = 1.7827e-03, PNorm = 154.3434, GNorm = 0.0503, lr_0 = 1.3780e-04
Loss = 7.5893e-04, PNorm = 154.3453, GNorm = 0.0677, lr_0 = 1.3771e-04
Loss = 9.0216e-04, PNorm = 154.3454, GNorm = 0.0557, lr_0 = 1.3761e-04
Loss = 1.1067e-03, PNorm = 154.3480, GNorm = 0.0773, lr_0 = 1.3752e-04
Loss = 9.3768e-04, PNorm = 154.3504, GNorm = 0.0518, lr_0 = 1.3742e-04
Loss = 3.9476e-03, PNorm = 154.3525, GNorm = 0.1304, lr_0 = 1.3733e-04
Loss = 1.5754e-03, PNorm = 154.3539, GNorm = 0.1149, lr_0 = 1.3724e-04
Loss = 1.3329e-03, PNorm = 154.3565, GNorm = 0.1550, lr_0 = 1.3714e-04
Loss = 1.8342e-03, PNorm = 154.3605, GNorm = 0.0386, lr_0 = 1.3705e-04
Loss = 7.7724e-04, PNorm = 154.3629, GNorm = 0.0684, lr_0 = 1.3695e-04
Loss = 6.2774e-04, PNorm = 154.3645, GNorm = 0.0746, lr_0 = 1.3686e-04
Loss = 1.2652e-03, PNorm = 154.3649, GNorm = 0.1424, lr_0 = 1.3677e-04
Loss = 7.1906e-04, PNorm = 154.3670, GNorm = 0.0709, lr_0 = 1.3667e-04
Loss = 1.1906e-03, PNorm = 154.3690, GNorm = 0.0266, lr_0 = 1.3658e-04
Loss = 7.8703e-04, PNorm = 154.3713, GNorm = 0.0344, lr_0 = 1.3649e-04
Loss = 7.0853e-04, PNorm = 154.3729, GNorm = 0.0859, lr_0 = 1.3639e-04
Loss = 1.0065e-03, PNorm = 154.3734, GNorm = 0.1127, lr_0 = 1.3630e-04
Loss = 1.1512e-03, PNorm = 154.3743, GNorm = 0.0551, lr_0 = 1.3621e-04
Loss = 6.9127e-04, PNorm = 154.3759, GNorm = 0.0410, lr_0 = 1.3611e-04
Loss = 1.4312e-03, PNorm = 154.3778, GNorm = 0.0570, lr_0 = 1.3602e-04
Loss = 9.6496e-04, PNorm = 154.3781, GNorm = 0.0597, lr_0 = 1.3593e-04
Loss = 8.2465e-04, PNorm = 154.3784, GNorm = 0.1164, lr_0 = 1.3583e-04
Loss = 9.7330e-04, PNorm = 154.3796, GNorm = 0.1525, lr_0 = 1.3574e-04
Loss = 7.0269e-04, PNorm = 154.3806, GNorm = 0.0526, lr_0 = 1.3565e-04
Loss = 1.0936e-03, PNorm = 154.3822, GNorm = 0.0566, lr_0 = 1.3555e-04
Loss = 1.2381e-03, PNorm = 154.3845, GNorm = 0.1102, lr_0 = 1.3546e-04
Loss = 6.9528e-04, PNorm = 154.3868, GNorm = 0.0330, lr_0 = 1.3537e-04
Loss = 7.7365e-04, PNorm = 154.3877, GNorm = 0.1347, lr_0 = 1.3528e-04
Loss = 1.0870e-03, PNorm = 154.3903, GNorm = 0.1292, lr_0 = 1.3518e-04
Loss = 2.4478e-03, PNorm = 154.3907, GNorm = 0.0706, lr_0 = 1.3509e-04
Loss = 1.9827e-03, PNorm = 154.3942, GNorm = 0.0684, lr_0 = 1.3500e-04
Loss = 1.3417e-03, PNorm = 154.3956, GNorm = 0.0829, lr_0 = 1.3491e-04
Loss = 1.0275e-03, PNorm = 154.3973, GNorm = 0.1161, lr_0 = 1.3481e-04
Loss = 2.4739e-03, PNorm = 154.3985, GNorm = 0.1756, lr_0 = 1.3472e-04
Loss = 1.2385e-03, PNorm = 154.4000, GNorm = 0.0557, lr_0 = 1.3463e-04
Loss = 1.3270e-03, PNorm = 154.4024, GNorm = 0.1136, lr_0 = 1.3454e-04
Loss = 7.7580e-04, PNorm = 154.4039, GNorm = 0.0202, lr_0 = 1.3444e-04
Loss = 8.3599e-04, PNorm = 154.4047, GNorm = 0.0318, lr_0 = 1.3435e-04
Loss = 7.0734e-04, PNorm = 154.4068, GNorm = 0.0464, lr_0 = 1.3426e-04
Loss = 1.6525e-03, PNorm = 154.4100, GNorm = 0.1579, lr_0 = 1.3417e-04
Loss = 7.2496e-04, PNorm = 154.4110, GNorm = 0.0866, lr_0 = 1.3408e-04
Loss = 1.3542e-03, PNorm = 154.4131, GNorm = 0.0835, lr_0 = 1.3398e-04
Loss = 2.0226e-03, PNorm = 154.4147, GNorm = 0.2022, lr_0 = 1.3389e-04
Loss = 2.5495e-03, PNorm = 154.4169, GNorm = 0.3954, lr_0 = 1.3380e-04
Loss = 1.2079e-03, PNorm = 154.4197, GNorm = 0.0871, lr_0 = 1.3371e-04
Loss = 2.0735e-03, PNorm = 154.4212, GNorm = 0.0573, lr_0 = 1.3362e-04
Loss = 2.3665e-03, PNorm = 154.4226, GNorm = 0.1083, lr_0 = 1.3353e-04
Loss = 1.6294e-03, PNorm = 154.4241, GNorm = 0.1505, lr_0 = 1.3343e-04
Loss = 2.1578e-03, PNorm = 154.4267, GNorm = 0.0903, lr_0 = 1.3334e-04
Loss = 1.6916e-03, PNorm = 154.4292, GNorm = 0.0568, lr_0 = 1.3325e-04
Loss = 3.0495e-03, PNorm = 154.4303, GNorm = 0.1614, lr_0 = 1.3316e-04
Loss = 1.1345e-03, PNorm = 154.4303, GNorm = 0.1102, lr_0 = 1.3307e-04
Loss = 1.7397e-03, PNorm = 154.4328, GNorm = 0.1592, lr_0 = 1.3298e-04
Loss = 1.1470e-03, PNorm = 154.4379, GNorm = 0.1488, lr_0 = 1.3289e-04
Loss = 3.5313e-03, PNorm = 154.4399, GNorm = 0.0563, lr_0 = 1.3280e-04
Loss = 9.9680e-04, PNorm = 154.4420, GNorm = 0.1659, lr_0 = 1.3270e-04
Loss = 9.1221e-04, PNorm = 154.4436, GNorm = 0.0367, lr_0 = 1.3261e-04
Loss = 7.8467e-04, PNorm = 154.4433, GNorm = 0.1307, lr_0 = 1.3252e-04
Loss = 8.2221e-04, PNorm = 154.4446, GNorm = 0.1128, lr_0 = 1.3243e-04
Loss = 1.2352e-03, PNorm = 154.4465, GNorm = 0.0774, lr_0 = 1.3234e-04
Loss = 6.6805e-04, PNorm = 154.4473, GNorm = 0.0701, lr_0 = 1.3225e-04
Loss = 3.4486e-03, PNorm = 154.4464, GNorm = 0.0727, lr_0 = 1.3216e-04
Loss = 7.9570e-04, PNorm = 154.4482, GNorm = 0.1184, lr_0 = 1.3207e-04
Loss = 1.6283e-03, PNorm = 154.4478, GNorm = 0.0942, lr_0 = 1.3198e-04
Loss = 3.0616e-03, PNorm = 154.4508, GNorm = 0.1669, lr_0 = 1.3189e-04
Loss = 1.4707e-03, PNorm = 154.4531, GNorm = 0.6182, lr_0 = 1.3180e-04
Loss = 8.0158e-04, PNorm = 154.4558, GNorm = 0.1176, lr_0 = 1.3171e-04
Loss = 1.4105e-03, PNorm = 154.4593, GNorm = 0.0653, lr_0 = 1.3162e-04
Loss = 1.8093e-03, PNorm = 154.4591, GNorm = 0.1258, lr_0 = 1.3153e-04
Loss = 3.5403e-03, PNorm = 154.4617, GNorm = 0.1224, lr_0 = 1.3144e-04
Loss = 1.8038e-03, PNorm = 154.4635, GNorm = 0.0397, lr_0 = 1.3135e-04
Loss = 5.8628e-04, PNorm = 154.4646, GNorm = 0.1060, lr_0 = 1.3126e-04
Loss = 3.5629e-03, PNorm = 154.4663, GNorm = 0.1144, lr_0 = 1.3117e-04
Loss = 7.5401e-04, PNorm = 154.4678, GNorm = 0.0334, lr_0 = 1.3108e-04
Loss = 9.1529e-04, PNorm = 154.4710, GNorm = 0.1406, lr_0 = 1.3099e-04
Loss = 1.2360e-03, PNorm = 154.4738, GNorm = 0.1222, lr_0 = 1.3090e-04
Loss = 1.0384e-03, PNorm = 154.4768, GNorm = 0.0382, lr_0 = 1.3081e-04
Loss = 1.5700e-03, PNorm = 154.4777, GNorm = 0.0821, lr_0 = 1.3072e-04
Loss = 1.6531e-03, PNorm = 154.4798, GNorm = 0.0488, lr_0 = 1.3063e-04
Loss = 1.1556e-03, PNorm = 154.4833, GNorm = 0.1131, lr_0 = 1.3054e-04
Loss = 1.6179e-03, PNorm = 154.4845, GNorm = 0.0901, lr_0 = 1.3045e-04
Loss = 2.1701e-03, PNorm = 154.4873, GNorm = 0.0983, lr_0 = 1.3036e-04
Loss = 1.2476e-03, PNorm = 154.4886, GNorm = 0.0656, lr_0 = 1.3027e-04
Loss = 7.4581e-04, PNorm = 154.4897, GNorm = 0.0870, lr_0 = 1.3018e-04
Loss = 2.0536e-03, PNorm = 154.4894, GNorm = 0.1192, lr_0 = 1.3009e-04
Loss = 1.0865e-03, PNorm = 154.4913, GNorm = 0.0517, lr_0 = 1.3000e-04
Loss = 7.0735e-04, PNorm = 154.4946, GNorm = 0.1182, lr_0 = 1.2992e-04
Loss = 8.8252e-04, PNorm = 154.4957, GNorm = 0.0609, lr_0 = 1.2983e-04
Loss = 8.2298e-04, PNorm = 154.4977, GNorm = 0.0381, lr_0 = 1.2974e-04
Loss = 2.7565e-03, PNorm = 154.4980, GNorm = 0.0854, lr_0 = 1.2965e-04
Loss = 9.7969e-04, PNorm = 154.4993, GNorm = 0.0610, lr_0 = 1.2956e-04
Loss = 1.0574e-03, PNorm = 154.5026, GNorm = 0.1333, lr_0 = 1.2947e-04
Loss = 8.7940e-04, PNorm = 154.5082, GNorm = 0.0735, lr_0 = 1.2938e-04
Loss = 2.1419e-03, PNorm = 154.5096, GNorm = 0.0387, lr_0 = 1.2929e-04
Loss = 3.0769e-03, PNorm = 154.5104, GNorm = 0.0344, lr_0 = 1.2921e-04
Loss = 1.2146e-03, PNorm = 154.5087, GNorm = 0.1124, lr_0 = 1.2912e-04
Loss = 8.2540e-04, PNorm = 154.5080, GNorm = 0.1693, lr_0 = 1.2903e-04
Loss = 1.3365e-03, PNorm = 154.5086, GNorm = 0.0690, lr_0 = 1.2894e-04
Loss = 1.0925e-03, PNorm = 154.5124, GNorm = 0.0372, lr_0 = 1.2885e-04
Loss = 8.1615e-04, PNorm = 154.5170, GNorm = 0.1084, lr_0 = 1.2876e-04
Loss = 1.2553e-03, PNorm = 154.5188, GNorm = 0.0477, lr_0 = 1.2867e-04
Loss = 2.0543e-03, PNorm = 154.5207, GNorm = 0.0741, lr_0 = 1.2859e-04
Loss = 1.2122e-03, PNorm = 154.5224, GNorm = 0.1018, lr_0 = 1.2850e-04
Loss = 3.4604e-03, PNorm = 154.5226, GNorm = 0.0693, lr_0 = 1.2841e-04
Loss = 1.3058e-03, PNorm = 154.5246, GNorm = 0.1637, lr_0 = 1.2832e-04
Loss = 6.5052e-04, PNorm = 154.5269, GNorm = 0.1499, lr_0 = 1.2823e-04
Loss = 1.0858e-03, PNorm = 154.5285, GNorm = 0.0580, lr_0 = 1.2815e-04
Loss = 1.4074e-03, PNorm = 154.5302, GNorm = 0.0844, lr_0 = 1.2806e-04
Loss = 9.6392e-04, PNorm = 154.5328, GNorm = 0.0890, lr_0 = 1.2797e-04
Validation mae = 0.477845
Epoch 27
Loss = 1.4281e-03, PNorm = 154.5343, GNorm = 0.0698, lr_0 = 1.2788e-04
Loss = 7.0346e-04, PNorm = 154.5357, GNorm = 0.1014, lr_0 = 1.2780e-04
Loss = 8.2514e-04, PNorm = 154.5378, GNorm = 0.1509, lr_0 = 1.2771e-04
Loss = 7.2466e-04, PNorm = 154.5384, GNorm = 0.1598, lr_0 = 1.2762e-04
Loss = 8.8836e-04, PNorm = 154.5393, GNorm = 0.1628, lr_0 = 1.2753e-04
Loss = 2.0934e-03, PNorm = 154.5404, GNorm = 0.0846, lr_0 = 1.2745e-04
Loss = 3.4820e-03, PNorm = 154.5430, GNorm = 0.2307, lr_0 = 1.2736e-04
Loss = 6.7887e-04, PNorm = 154.5460, GNorm = 0.2095, lr_0 = 1.2727e-04
Loss = 1.2020e-03, PNorm = 154.5468, GNorm = 0.0834, lr_0 = 1.2718e-04
Loss = 6.9903e-04, PNorm = 154.5485, GNorm = 0.1081, lr_0 = 1.2710e-04
Loss = 1.2426e-03, PNorm = 154.5502, GNorm = 0.0861, lr_0 = 1.2701e-04
Loss = 7.9099e-04, PNorm = 154.5529, GNorm = 0.0877, lr_0 = 1.2692e-04
Loss = 1.3620e-03, PNorm = 154.5555, GNorm = 0.0848, lr_0 = 1.2684e-04
Loss = 1.0138e-03, PNorm = 154.5585, GNorm = 0.0547, lr_0 = 1.2675e-04
Loss = 1.0779e-03, PNorm = 154.5589, GNorm = 0.0832, lr_0 = 1.2666e-04
Loss = 7.7316e-04, PNorm = 154.5594, GNorm = 0.1148, lr_0 = 1.2658e-04
Loss = 5.2829e-04, PNorm = 154.5604, GNorm = 0.1039, lr_0 = 1.2649e-04
Loss = 1.2486e-03, PNorm = 154.5613, GNorm = 0.2502, lr_0 = 1.2640e-04
Loss = 1.8025e-03, PNorm = 154.5606, GNorm = 0.2570, lr_0 = 1.2632e-04
Loss = 1.7220e-03, PNorm = 154.5621, GNorm = 0.1738, lr_0 = 1.2623e-04
Loss = 5.4650e-04, PNorm = 154.5626, GNorm = 0.0570, lr_0 = 1.2614e-04
Loss = 1.6143e-03, PNorm = 154.5643, GNorm = 0.0420, lr_0 = 1.2606e-04
Loss = 9.8508e-04, PNorm = 154.5657, GNorm = 0.0599, lr_0 = 1.2597e-04
Loss = 1.4210e-03, PNorm = 154.5686, GNorm = 0.1087, lr_0 = 1.2588e-04
Loss = 3.2981e-03, PNorm = 154.5726, GNorm = 0.0733, lr_0 = 1.2580e-04
Loss = 3.1394e-03, PNorm = 154.5746, GNorm = 0.1155, lr_0 = 1.2571e-04
Loss = 7.4288e-04, PNorm = 154.5771, GNorm = 0.0412, lr_0 = 1.2563e-04
Loss = 1.3558e-03, PNorm = 154.5781, GNorm = 0.0786, lr_0 = 1.2554e-04
Loss = 1.4504e-03, PNorm = 154.5785, GNorm = 0.1179, lr_0 = 1.2545e-04
Loss = 1.0485e-03, PNorm = 154.5796, GNorm = 0.1104, lr_0 = 1.2537e-04
Loss = 7.7218e-04, PNorm = 154.5814, GNorm = 0.1279, lr_0 = 1.2528e-04
Loss = 9.0609e-04, PNorm = 154.5838, GNorm = 0.0632, lr_0 = 1.2520e-04
Loss = 6.7608e-04, PNorm = 154.5858, GNorm = 0.1223, lr_0 = 1.2511e-04
Loss = 6.1434e-04, PNorm = 154.5849, GNorm = 0.0716, lr_0 = 1.2502e-04
Loss = 1.6927e-03, PNorm = 154.5851, GNorm = 0.1212, lr_0 = 1.2494e-04
Loss = 1.2606e-03, PNorm = 154.5864, GNorm = 0.1143, lr_0 = 1.2485e-04
Loss = 6.0096e-04, PNorm = 154.5878, GNorm = 0.0471, lr_0 = 1.2477e-04
Loss = 6.7248e-04, PNorm = 154.5890, GNorm = 0.0759, lr_0 = 1.2468e-04
Loss = 1.1351e-03, PNorm = 154.5905, GNorm = 0.0243, lr_0 = 1.2460e-04
Loss = 2.2326e-03, PNorm = 154.5897, GNorm = 0.0695, lr_0 = 1.2451e-04
Loss = 2.2926e-03, PNorm = 154.5912, GNorm = 0.0599, lr_0 = 1.2443e-04
Loss = 1.5789e-03, PNorm = 154.5933, GNorm = 0.0497, lr_0 = 1.2434e-04
Loss = 1.0295e-03, PNorm = 154.5940, GNorm = 0.0439, lr_0 = 1.2426e-04
Loss = 7.6026e-04, PNorm = 154.5944, GNorm = 0.0890, lr_0 = 1.2417e-04
Loss = 7.5561e-04, PNorm = 154.5970, GNorm = 0.0686, lr_0 = 1.2409e-04
Loss = 6.6298e-04, PNorm = 154.5988, GNorm = 0.0748, lr_0 = 1.2400e-04
Loss = 2.8106e-03, PNorm = 154.6000, GNorm = 0.0663, lr_0 = 1.2392e-04
Loss = 1.0643e-03, PNorm = 154.6020, GNorm = 0.0981, lr_0 = 1.2383e-04
Loss = 1.0370e-03, PNorm = 154.6045, GNorm = 0.1073, lr_0 = 1.2375e-04
Loss = 6.9587e-04, PNorm = 154.6051, GNorm = 0.1032, lr_0 = 1.2366e-04
Loss = 1.6335e-03, PNorm = 154.6063, GNorm = 0.0405, lr_0 = 1.2358e-04
Loss = 1.1119e-03, PNorm = 154.6077, GNorm = 0.0860, lr_0 = 1.2349e-04
Loss = 1.8998e-03, PNorm = 154.6076, GNorm = 0.0675, lr_0 = 1.2341e-04
Loss = 8.8277e-04, PNorm = 154.6076, GNorm = 0.1997, lr_0 = 1.2332e-04
Loss = 2.4800e-03, PNorm = 154.6079, GNorm = 0.3214, lr_0 = 1.2324e-04
Loss = 1.1553e-03, PNorm = 154.6087, GNorm = 0.0955, lr_0 = 1.2315e-04
Loss = 1.3770e-03, PNorm = 154.6106, GNorm = 0.1417, lr_0 = 1.2307e-04
Loss = 2.5111e-03, PNorm = 154.6115, GNorm = 0.4129, lr_0 = 1.2298e-04
Loss = 1.1562e-03, PNorm = 154.6136, GNorm = 0.0911, lr_0 = 1.2290e-04
Loss = 1.8155e-03, PNorm = 154.6167, GNorm = 0.2573, lr_0 = 1.2282e-04
Loss = 8.9941e-04, PNorm = 154.6195, GNorm = 0.2138, lr_0 = 1.2273e-04
Loss = 1.2928e-03, PNorm = 154.6206, GNorm = 0.0943, lr_0 = 1.2265e-04
Loss = 1.3415e-03, PNorm = 154.6242, GNorm = 0.0446, lr_0 = 1.2256e-04
Loss = 1.8182e-03, PNorm = 154.6257, GNorm = 0.2116, lr_0 = 1.2248e-04
Loss = 5.4627e-04, PNorm = 154.6278, GNorm = 0.0356, lr_0 = 1.2240e-04
Loss = 1.0297e-03, PNorm = 154.6287, GNorm = 0.0303, lr_0 = 1.2231e-04
Loss = 7.0954e-04, PNorm = 154.6300, GNorm = 0.1126, lr_0 = 1.2223e-04
Loss = 2.0371e-03, PNorm = 154.6313, GNorm = 0.1459, lr_0 = 1.2214e-04
Loss = 6.7181e-04, PNorm = 154.6325, GNorm = 0.1382, lr_0 = 1.2206e-04
Loss = 1.1196e-03, PNorm = 154.6323, GNorm = 0.0449, lr_0 = 1.2198e-04
Loss = 1.7738e-03, PNorm = 154.6341, GNorm = 0.1259, lr_0 = 1.2189e-04
Loss = 6.6840e-04, PNorm = 154.6358, GNorm = 0.0382, lr_0 = 1.2181e-04
Loss = 6.6379e-04, PNorm = 154.6376, GNorm = 0.0481, lr_0 = 1.2173e-04
Loss = 1.1889e-03, PNorm = 154.6401, GNorm = 0.1032, lr_0 = 1.2164e-04
Loss = 1.9160e-03, PNorm = 154.6418, GNorm = 0.1148, lr_0 = 1.2156e-04
Loss = 1.0080e-03, PNorm = 154.6430, GNorm = 0.0353, lr_0 = 1.2148e-04
Loss = 7.3018e-04, PNorm = 154.6440, GNorm = 0.0363, lr_0 = 1.2139e-04
Loss = 7.9708e-04, PNorm = 154.6454, GNorm = 0.0475, lr_0 = 1.2131e-04
Loss = 6.4846e-04, PNorm = 154.6478, GNorm = 0.0448, lr_0 = 1.2123e-04
Loss = 1.2245e-03, PNorm = 154.6482, GNorm = 0.2569, lr_0 = 1.2114e-04
Loss = 8.3084e-04, PNorm = 154.6491, GNorm = 0.0376, lr_0 = 1.2106e-04
Loss = 1.4022e-03, PNorm = 154.6494, GNorm = 0.2226, lr_0 = 1.2098e-04
Loss = 6.1880e-04, PNorm = 154.6515, GNorm = 0.0663, lr_0 = 1.2090e-04
Loss = 8.8978e-04, PNorm = 154.6538, GNorm = 0.0633, lr_0 = 1.2081e-04
Loss = 1.0610e-03, PNorm = 154.6563, GNorm = 0.0450, lr_0 = 1.2073e-04
Loss = 2.2317e-03, PNorm = 154.6578, GNorm = 0.1545, lr_0 = 1.2065e-04
Loss = 2.9129e-03, PNorm = 154.6597, GNorm = 0.1156, lr_0 = 1.2056e-04
Loss = 1.5013e-03, PNorm = 154.6620, GNorm = 0.0338, lr_0 = 1.2048e-04
Loss = 2.6707e-03, PNorm = 154.6639, GNorm = 0.0506, lr_0 = 1.2040e-04
Loss = 1.2273e-03, PNorm = 154.6650, GNorm = 0.1482, lr_0 = 1.2032e-04
Loss = 7.7900e-04, PNorm = 154.6658, GNorm = 0.0466, lr_0 = 1.2023e-04
Loss = 6.8903e-04, PNorm = 154.6643, GNorm = 0.0222, lr_0 = 1.2015e-04
Loss = 5.8480e-04, PNorm = 154.6653, GNorm = 0.0767, lr_0 = 1.2007e-04
Loss = 1.2784e-03, PNorm = 154.6661, GNorm = 0.1207, lr_0 = 1.1999e-04
Loss = 8.5519e-04, PNorm = 154.6667, GNorm = 0.0819, lr_0 = 1.1991e-04
Loss = 1.0061e-03, PNorm = 154.6670, GNorm = 0.0656, lr_0 = 1.1982e-04
Loss = 2.1830e-03, PNorm = 154.6679, GNorm = 0.0698, lr_0 = 1.1974e-04
Loss = 1.5325e-03, PNorm = 154.6688, GNorm = 0.0274, lr_0 = 1.1966e-04
Loss = 1.8791e-03, PNorm = 154.6694, GNorm = 0.0797, lr_0 = 1.1958e-04
Loss = 1.0885e-03, PNorm = 154.6717, GNorm = 0.0791, lr_0 = 1.1950e-04
Loss = 2.8779e-03, PNorm = 154.6744, GNorm = 0.1371, lr_0 = 1.1941e-04
Loss = 1.4663e-03, PNorm = 154.6766, GNorm = 0.2435, lr_0 = 1.1933e-04
Loss = 1.4936e-03, PNorm = 154.6772, GNorm = 0.1051, lr_0 = 1.1925e-04
Loss = 5.7780e-04, PNorm = 154.6775, GNorm = 0.0292, lr_0 = 1.1917e-04
Loss = 2.7570e-03, PNorm = 154.6799, GNorm = 0.0632, lr_0 = 1.1909e-04
Loss = 6.1814e-04, PNorm = 154.6826, GNorm = 0.0752, lr_0 = 1.1901e-04
Loss = 9.5422e-04, PNorm = 154.6833, GNorm = 0.0905, lr_0 = 1.1892e-04
Loss = 9.8695e-04, PNorm = 154.6864, GNorm = 0.1059, lr_0 = 1.1884e-04
Loss = 7.5924e-04, PNorm = 154.6863, GNorm = 0.2021, lr_0 = 1.1876e-04
Loss = 7.1822e-04, PNorm = 154.6885, GNorm = 0.0438, lr_0 = 1.1868e-04
Loss = 9.0537e-04, PNorm = 154.6895, GNorm = 0.1181, lr_0 = 1.1860e-04
Loss = 2.3523e-03, PNorm = 154.6901, GNorm = 0.0769, lr_0 = 1.1852e-04
Loss = 8.8756e-04, PNorm = 154.6927, GNorm = 0.0565, lr_0 = 1.1844e-04
Loss = 1.0947e-03, PNorm = 154.6922, GNorm = 0.0772, lr_0 = 1.1835e-04
Loss = 1.0668e-03, PNorm = 154.6934, GNorm = 0.1054, lr_0 = 1.1827e-04
Loss = 1.5966e-03, PNorm = 154.6932, GNorm = 0.0385, lr_0 = 1.1819e-04
Loss = 6.5479e-04, PNorm = 154.6935, GNorm = 0.0470, lr_0 = 1.1811e-04
Loss = 1.6596e-03, PNorm = 154.6952, GNorm = 0.1067, lr_0 = 1.1803e-04
Loss = 1.5373e-03, PNorm = 154.6957, GNorm = 0.0510, lr_0 = 1.1795e-04
Loss = 1.4275e-03, PNorm = 154.6975, GNorm = 0.0655, lr_0 = 1.1787e-04
Validation mae = 0.476633
Epoch 28
Loss = 9.5797e-04, PNorm = 154.6997, GNorm = 0.1112, lr_0 = 1.1779e-04
Loss = 1.1551e-03, PNorm = 154.7012, GNorm = 0.1025, lr_0 = 1.1771e-04
Loss = 6.7086e-04, PNorm = 154.7052, GNorm = 0.1484, lr_0 = 1.1763e-04
Loss = 1.6008e-03, PNorm = 154.7075, GNorm = 0.0964, lr_0 = 1.1755e-04
Loss = 2.0106e-03, PNorm = 154.7104, GNorm = 0.0826, lr_0 = 1.1747e-04
Loss = 7.1658e-04, PNorm = 154.7126, GNorm = 0.0674, lr_0 = 1.1739e-04
Loss = 8.7607e-04, PNorm = 154.7134, GNorm = 0.0966, lr_0 = 1.1730e-04
Loss = 7.6188e-04, PNorm = 154.7145, GNorm = 0.0890, lr_0 = 1.1722e-04
Loss = 1.5699e-03, PNorm = 154.7138, GNorm = 0.0976, lr_0 = 1.1714e-04
Loss = 1.1136e-03, PNorm = 154.7145, GNorm = 0.1315, lr_0 = 1.1706e-04
Loss = 8.3000e-04, PNorm = 154.7163, GNorm = 0.0695, lr_0 = 1.1698e-04
Loss = 4.5140e-04, PNorm = 154.7167, GNorm = 0.1140, lr_0 = 1.1690e-04
Loss = 1.4809e-03, PNorm = 154.7174, GNorm = 0.1578, lr_0 = 1.1682e-04
Loss = 8.9709e-04, PNorm = 154.7196, GNorm = 0.0638, lr_0 = 1.1674e-04
Loss = 5.3905e-04, PNorm = 154.7220, GNorm = 0.0881, lr_0 = 1.1666e-04
Loss = 1.7658e-03, PNorm = 154.7230, GNorm = 0.0307, lr_0 = 1.1658e-04
Loss = 6.6628e-04, PNorm = 154.7240, GNorm = 0.1143, lr_0 = 1.1650e-04
Loss = 1.1084e-03, PNorm = 154.7242, GNorm = 0.0310, lr_0 = 1.1642e-04
Loss = 1.6860e-03, PNorm = 154.7266, GNorm = 0.0649, lr_0 = 1.1634e-04
Loss = 5.4242e-04, PNorm = 154.7281, GNorm = 0.0349, lr_0 = 1.1626e-04
Loss = 5.7887e-04, PNorm = 154.7289, GNorm = 0.0545, lr_0 = 1.1618e-04
Loss = 1.2661e-03, PNorm = 154.7304, GNorm = 0.0404, lr_0 = 1.1611e-04
Loss = 1.7667e-03, PNorm = 154.7330, GNorm = 0.1665, lr_0 = 1.1603e-04
Loss = 1.5754e-03, PNorm = 154.7356, GNorm = 0.0278, lr_0 = 1.1595e-04
Loss = 1.0181e-03, PNorm = 154.7371, GNorm = 0.1766, lr_0 = 1.1587e-04
Loss = 1.1462e-03, PNorm = 154.7388, GNorm = 0.1296, lr_0 = 1.1579e-04
Loss = 1.4774e-03, PNorm = 154.7401, GNorm = 0.1350, lr_0 = 1.1571e-04
Loss = 8.5487e-04, PNorm = 154.7419, GNorm = 0.0341, lr_0 = 1.1563e-04
Loss = 1.6746e-03, PNorm = 154.7434, GNorm = 0.0860, lr_0 = 1.1555e-04
Loss = 1.0447e-03, PNorm = 154.7437, GNorm = 0.0819, lr_0 = 1.1547e-04
Loss = 6.9777e-04, PNorm = 154.7441, GNorm = 0.0270, lr_0 = 1.1539e-04
Loss = 5.3518e-04, PNorm = 154.7441, GNorm = 0.0631, lr_0 = 1.1531e-04
Loss = 7.0883e-04, PNorm = 154.7443, GNorm = 0.1058, lr_0 = 1.1523e-04
Loss = 1.3750e-03, PNorm = 154.7455, GNorm = 0.0335, lr_0 = 1.1515e-04
Loss = 6.5838e-04, PNorm = 154.7466, GNorm = 0.0681, lr_0 = 1.1508e-04
Loss = 9.9720e-04, PNorm = 154.7463, GNorm = 0.2743, lr_0 = 1.1500e-04
Loss = 9.9740e-04, PNorm = 154.7475, GNorm = 0.0386, lr_0 = 1.1492e-04
Loss = 1.1287e-03, PNorm = 154.7497, GNorm = 0.1007, lr_0 = 1.1484e-04
Loss = 1.9228e-03, PNorm = 154.7514, GNorm = 0.1165, lr_0 = 1.1476e-04
Loss = 1.0184e-03, PNorm = 154.7530, GNorm = 0.1415, lr_0 = 1.1468e-04
Loss = 2.5031e-03, PNorm = 154.7543, GNorm = 0.1646, lr_0 = 1.1460e-04
Loss = 1.3377e-03, PNorm = 154.7541, GNorm = 0.0510, lr_0 = 1.1452e-04
Loss = 5.1021e-04, PNorm = 154.7559, GNorm = 0.0527, lr_0 = 1.1445e-04
Loss = 4.5291e-04, PNorm = 154.7572, GNorm = 0.0800, lr_0 = 1.1437e-04
Loss = 1.7594e-03, PNorm = 154.7586, GNorm = 0.0301, lr_0 = 1.1429e-04
Loss = 2.1954e-03, PNorm = 154.7607, GNorm = 0.1819, lr_0 = 1.1421e-04
Loss = 1.6312e-03, PNorm = 154.7614, GNorm = 0.1018, lr_0 = 1.1413e-04
Loss = 4.8565e-04, PNorm = 154.7637, GNorm = 0.0383, lr_0 = 1.1405e-04
Loss = 1.4409e-03, PNorm = 154.7647, GNorm = 0.1653, lr_0 = 1.1398e-04
Loss = 9.4227e-04, PNorm = 154.7659, GNorm = 0.1191, lr_0 = 1.1390e-04
Loss = 1.0233e-03, PNorm = 154.7674, GNorm = 0.0696, lr_0 = 1.1382e-04
Loss = 1.1122e-03, PNorm = 154.7683, GNorm = 0.1688, lr_0 = 1.1374e-04
Loss = 9.3314e-04, PNorm = 154.7692, GNorm = 0.0556, lr_0 = 1.1366e-04
Loss = 1.0778e-03, PNorm = 154.7703, GNorm = 0.1219, lr_0 = 1.1359e-04
Loss = 7.7188e-04, PNorm = 154.7722, GNorm = 0.0669, lr_0 = 1.1351e-04
Loss = 1.5796e-03, PNorm = 154.7733, GNorm = 0.0419, lr_0 = 1.1343e-04
Loss = 8.5434e-04, PNorm = 154.7751, GNorm = 0.0838, lr_0 = 1.1335e-04
Loss = 7.5053e-04, PNorm = 154.7770, GNorm = 0.0929, lr_0 = 1.1328e-04
Loss = 1.0302e-03, PNorm = 154.7793, GNorm = 0.0451, lr_0 = 1.1320e-04
Loss = 1.9729e-03, PNorm = 154.7797, GNorm = 0.0794, lr_0 = 1.1312e-04
Loss = 6.7725e-04, PNorm = 154.7809, GNorm = 0.0489, lr_0 = 1.1304e-04
Loss = 1.0744e-03, PNorm = 154.7833, GNorm = 0.0827, lr_0 = 1.1297e-04
Loss = 9.0676e-04, PNorm = 154.7837, GNorm = 0.1361, lr_0 = 1.1289e-04
Loss = 5.7524e-04, PNorm = 154.7849, GNorm = 0.0646, lr_0 = 1.1281e-04
Loss = 9.4173e-04, PNorm = 154.7861, GNorm = 0.0598, lr_0 = 1.1273e-04
Loss = 4.2570e-03, PNorm = 154.7872, GNorm = 1.0348, lr_0 = 1.1266e-04
Loss = 5.8815e-04, PNorm = 154.7874, GNorm = 0.1168, lr_0 = 1.1258e-04
Loss = 5.6908e-04, PNorm = 154.7884, GNorm = 0.1359, lr_0 = 1.1250e-04
Loss = 2.2251e-03, PNorm = 154.7885, GNorm = 0.0723, lr_0 = 1.1243e-04
Loss = 1.0615e-03, PNorm = 154.7893, GNorm = 0.0383, lr_0 = 1.1235e-04
Loss = 2.1461e-03, PNorm = 154.7918, GNorm = 0.1833, lr_0 = 1.1227e-04
Loss = 6.1113e-04, PNorm = 154.7941, GNorm = 0.0764, lr_0 = 1.1219e-04
Loss = 8.3929e-04, PNorm = 154.7951, GNorm = 0.1374, lr_0 = 1.1212e-04
Loss = 2.2164e-03, PNorm = 154.7977, GNorm = 0.1249, lr_0 = 1.1204e-04
Loss = 1.2196e-03, PNorm = 154.7997, GNorm = 0.1143, lr_0 = 1.1196e-04
Loss = 1.2429e-03, PNorm = 154.8015, GNorm = 0.1081, lr_0 = 1.1189e-04
Loss = 9.0469e-04, PNorm = 154.8022, GNorm = 0.1062, lr_0 = 1.1181e-04
Loss = 5.2343e-04, PNorm = 154.8036, GNorm = 0.2068, lr_0 = 1.1173e-04
Loss = 6.9809e-04, PNorm = 154.8050, GNorm = 0.0472, lr_0 = 1.1166e-04
Loss = 1.9600e-03, PNorm = 154.8063, GNorm = 0.0826, lr_0 = 1.1158e-04
Loss = 7.7861e-04, PNorm = 154.8075, GNorm = 0.0803, lr_0 = 1.1150e-04
Loss = 9.8806e-04, PNorm = 154.8094, GNorm = 0.1441, lr_0 = 1.1143e-04
Loss = 8.6123e-04, PNorm = 154.8112, GNorm = 0.0213, lr_0 = 1.1135e-04
Loss = 7.9399e-04, PNorm = 154.8132, GNorm = 0.2061, lr_0 = 1.1128e-04
Loss = 8.0127e-04, PNorm = 154.8155, GNorm = 0.1115, lr_0 = 1.1120e-04
Loss = 5.5520e-04, PNorm = 154.8170, GNorm = 0.0743, lr_0 = 1.1112e-04
Loss = 6.0105e-04, PNorm = 154.8157, GNorm = 0.1340, lr_0 = 1.1105e-04
Loss = 4.8407e-03, PNorm = 154.8155, GNorm = 0.0850, lr_0 = 1.1097e-04
Loss = 7.0885e-04, PNorm = 154.8151, GNorm = 0.0326, lr_0 = 1.1089e-04
Loss = 1.9853e-03, PNorm = 154.8157, GNorm = 0.0271, lr_0 = 1.1082e-04
Loss = 1.4709e-03, PNorm = 154.8182, GNorm = 0.0949, lr_0 = 1.1074e-04
Loss = 2.7535e-03, PNorm = 154.8187, GNorm = 0.1721, lr_0 = 1.1067e-04
Loss = 4.9088e-04, PNorm = 154.8204, GNorm = 0.0633, lr_0 = 1.1059e-04
Loss = 5.0633e-04, PNorm = 154.8204, GNorm = 0.0427, lr_0 = 1.1052e-04
Loss = 1.4497e-03, PNorm = 154.8217, GNorm = 0.1155, lr_0 = 1.1044e-04
Loss = 1.2069e-03, PNorm = 154.8230, GNorm = 0.0468, lr_0 = 1.1036e-04
Loss = 7.6163e-04, PNorm = 154.8232, GNorm = 0.0774, lr_0 = 1.1029e-04
Loss = 1.6068e-03, PNorm = 154.8239, GNorm = 0.1261, lr_0 = 1.1021e-04
Loss = 1.1866e-03, PNorm = 154.8250, GNorm = 0.1285, lr_0 = 1.1014e-04
Loss = 6.6033e-04, PNorm = 154.8260, GNorm = 0.1119, lr_0 = 1.1006e-04
Loss = 6.0714e-04, PNorm = 154.8279, GNorm = 0.0824, lr_0 = 1.0999e-04
Loss = 1.4567e-03, PNorm = 154.8291, GNorm = 0.0471, lr_0 = 1.0991e-04
Loss = 1.5578e-03, PNorm = 154.8305, GNorm = 0.1001, lr_0 = 1.0984e-04
Loss = 1.2512e-03, PNorm = 154.8314, GNorm = 0.3888, lr_0 = 1.0976e-04
Loss = 9.4791e-04, PNorm = 154.8326, GNorm = 0.0513, lr_0 = 1.0969e-04
Loss = 1.4051e-03, PNorm = 154.8348, GNorm = 0.0909, lr_0 = 1.0961e-04
Loss = 5.5756e-04, PNorm = 154.8356, GNorm = 0.2395, lr_0 = 1.0954e-04
Loss = 4.7047e-04, PNorm = 154.8364, GNorm = 0.0786, lr_0 = 1.0946e-04
Loss = 8.1555e-04, PNorm = 154.8372, GNorm = 0.0538, lr_0 = 1.0939e-04
Loss = 5.7800e-04, PNorm = 154.8385, GNorm = 0.0822, lr_0 = 1.0931e-04
Loss = 1.0122e-03, PNorm = 154.8386, GNorm = 0.0799, lr_0 = 1.0924e-04
Loss = 8.0163e-04, PNorm = 154.8400, GNorm = 0.0885, lr_0 = 1.0916e-04
Loss = 1.1716e-03, PNorm = 154.8413, GNorm = 0.0514, lr_0 = 1.0909e-04
Loss = 1.1872e-03, PNorm = 154.8439, GNorm = 0.1664, lr_0 = 1.0901e-04
Loss = 3.8008e-03, PNorm = 154.8445, GNorm = 0.0814, lr_0 = 1.0894e-04
Loss = 1.1034e-03, PNorm = 154.8451, GNorm = 0.0641, lr_0 = 1.0886e-04
Loss = 1.4118e-03, PNorm = 154.8445, GNorm = 0.1024, lr_0 = 1.0879e-04
Loss = 1.1430e-03, PNorm = 154.8451, GNorm = 0.1491, lr_0 = 1.0871e-04
Loss = 6.3485e-04, PNorm = 154.8466, GNorm = 0.1085, lr_0 = 1.0864e-04
Loss = 1.9769e-03, PNorm = 154.8476, GNorm = 0.0365, lr_0 = 1.0856e-04
Validation mae = 0.476795
Epoch 29
Loss = 6.8880e-04, PNorm = 154.8487, GNorm = 0.1017, lr_0 = 1.0849e-04
Loss = 1.1268e-03, PNorm = 154.8491, GNorm = 0.0345, lr_0 = 1.0841e-04
Loss = 5.0852e-04, PNorm = 154.8518, GNorm = 0.0433, lr_0 = 1.0834e-04
Loss = 6.5571e-04, PNorm = 154.8532, GNorm = 0.0828, lr_0 = 1.0827e-04
Loss = 1.2627e-03, PNorm = 154.8547, GNorm = 0.0858, lr_0 = 1.0819e-04
Loss = 6.9196e-04, PNorm = 154.8546, GNorm = 0.1263, lr_0 = 1.0812e-04
Loss = 7.6688e-04, PNorm = 154.8559, GNorm = 0.0726, lr_0 = 1.0804e-04
Loss = 2.0910e-03, PNorm = 154.8557, GNorm = 0.0289, lr_0 = 1.0797e-04
Loss = 8.2204e-04, PNorm = 154.8564, GNorm = 0.0239, lr_0 = 1.0790e-04
Loss = 8.3264e-04, PNorm = 154.8568, GNorm = 0.0594, lr_0 = 1.0782e-04
Loss = 5.6377e-04, PNorm = 154.8583, GNorm = 0.0380, lr_0 = 1.0775e-04
Loss = 1.7217e-03, PNorm = 154.8596, GNorm = 0.3157, lr_0 = 1.0767e-04
Loss = 6.4264e-04, PNorm = 154.8608, GNorm = 0.1356, lr_0 = 1.0760e-04
Loss = 2.7409e-03, PNorm = 154.8610, GNorm = 0.0566, lr_0 = 1.0753e-04
Loss = 8.2485e-04, PNorm = 154.8618, GNorm = 0.0797, lr_0 = 1.0745e-04
Loss = 4.7440e-04, PNorm = 154.8624, GNorm = 0.0383, lr_0 = 1.0738e-04
Loss = 8.0489e-04, PNorm = 154.8632, GNorm = 0.0879, lr_0 = 1.0731e-04
Loss = 8.2297e-04, PNorm = 154.8650, GNorm = 0.0313, lr_0 = 1.0723e-04
Loss = 2.5560e-03, PNorm = 154.8671, GNorm = 0.0867, lr_0 = 1.0716e-04
Loss = 5.5358e-04, PNorm = 154.8687, GNorm = 0.0799, lr_0 = 1.0709e-04
Loss = 9.5384e-04, PNorm = 154.8698, GNorm = 0.0403, lr_0 = 1.0701e-04
Loss = 2.9092e-03, PNorm = 154.8705, GNorm = 0.4713, lr_0 = 1.0694e-04
Loss = 7.3876e-04, PNorm = 154.8712, GNorm = 0.0526, lr_0 = 1.0687e-04
Loss = 1.1994e-03, PNorm = 154.8726, GNorm = 0.1058, lr_0 = 1.0679e-04
Loss = 1.0881e-03, PNorm = 154.8746, GNorm = 0.1021, lr_0 = 1.0672e-04
Loss = 1.3046e-03, PNorm = 154.8745, GNorm = 0.0638, lr_0 = 1.0665e-04
Loss = 2.2536e-03, PNorm = 154.8724, GNorm = 0.0345, lr_0 = 1.0657e-04
Loss = 4.1707e-04, PNorm = 154.8720, GNorm = 0.0474, lr_0 = 1.0650e-04
Loss = 1.0855e-03, PNorm = 154.8745, GNorm = 0.0511, lr_0 = 1.0643e-04
Loss = 6.2786e-04, PNorm = 154.8767, GNorm = 0.0456, lr_0 = 1.0635e-04
Loss = 3.9174e-04, PNorm = 154.8777, GNorm = 0.0650, lr_0 = 1.0628e-04
Loss = 1.2885e-03, PNorm = 154.8771, GNorm = 0.2252, lr_0 = 1.0621e-04
Loss = 5.5235e-04, PNorm = 154.8782, GNorm = 0.1303, lr_0 = 1.0614e-04
Loss = 2.1894e-03, PNorm = 154.8802, GNorm = 0.2908, lr_0 = 1.0606e-04
Loss = 5.6699e-04, PNorm = 154.8822, GNorm = 0.0266, lr_0 = 1.0599e-04
Loss = 4.1277e-04, PNorm = 154.8838, GNorm = 0.0553, lr_0 = 1.0592e-04
Loss = 6.6782e-04, PNorm = 154.8848, GNorm = 0.0712, lr_0 = 1.0585e-04
Loss = 2.2689e-03, PNorm = 154.8861, GNorm = 0.0999, lr_0 = 1.0577e-04
Loss = 1.3567e-03, PNorm = 154.8859, GNorm = 0.1417, lr_0 = 1.0570e-04
Loss = 3.5046e-04, PNorm = 154.8863, GNorm = 0.1019, lr_0 = 1.0563e-04
Loss = 4.5125e-04, PNorm = 154.8868, GNorm = 0.0317, lr_0 = 1.0556e-04
Loss = 8.9908e-04, PNorm = 154.8889, GNorm = 0.1555, lr_0 = 1.0548e-04
Loss = 9.7313e-04, PNorm = 154.8902, GNorm = 0.0251, lr_0 = 1.0541e-04
Loss = 6.6842e-04, PNorm = 154.8907, GNorm = 0.0687, lr_0 = 1.0534e-04
Loss = 6.6955e-04, PNorm = 154.8924, GNorm = 0.1044, lr_0 = 1.0527e-04
Loss = 8.9407e-04, PNorm = 154.8917, GNorm = 0.1232, lr_0 = 1.0519e-04
Loss = 1.7103e-03, PNorm = 154.8924, GNorm = 0.1348, lr_0 = 1.0512e-04
Loss = 7.1941e-04, PNorm = 154.8923, GNorm = 0.1073, lr_0 = 1.0505e-04
Loss = 7.5229e-04, PNorm = 154.8929, GNorm = 0.0623, lr_0 = 1.0498e-04
Loss = 6.7833e-04, PNorm = 154.8938, GNorm = 0.0687, lr_0 = 1.0491e-04
Loss = 6.5645e-04, PNorm = 154.8947, GNorm = 0.0382, lr_0 = 1.0483e-04
Loss = 6.8043e-04, PNorm = 154.8956, GNorm = 0.0635, lr_0 = 1.0476e-04
Loss = 6.8992e-04, PNorm = 154.8963, GNorm = 0.0456, lr_0 = 1.0469e-04
Loss = 1.4854e-03, PNorm = 154.8976, GNorm = 0.1282, lr_0 = 1.0462e-04
Loss = 4.1817e-04, PNorm = 154.8986, GNorm = 0.0362, lr_0 = 1.0455e-04
Loss = 4.1558e-04, PNorm = 154.8994, GNorm = 0.0623, lr_0 = 1.0448e-04
Loss = 8.6289e-04, PNorm = 154.9003, GNorm = 0.0684, lr_0 = 1.0440e-04
Loss = 1.3677e-03, PNorm = 154.9015, GNorm = 0.2138, lr_0 = 1.0433e-04
Loss = 1.4260e-03, PNorm = 154.9046, GNorm = 0.0708, lr_0 = 1.0426e-04
Loss = 9.3235e-04, PNorm = 154.9057, GNorm = 0.0811, lr_0 = 1.0419e-04
Loss = 5.1096e-04, PNorm = 154.9077, GNorm = 0.0228, lr_0 = 1.0412e-04
Loss = 1.5882e-03, PNorm = 154.9080, GNorm = 0.0954, lr_0 = 1.0405e-04
Loss = 2.0064e-03, PNorm = 154.9092, GNorm = 0.0604, lr_0 = 1.0398e-04
Loss = 6.4189e-04, PNorm = 154.9096, GNorm = 0.0920, lr_0 = 1.0391e-04
Loss = 1.4188e-03, PNorm = 154.9096, GNorm = 0.0947, lr_0 = 1.0383e-04
Loss = 9.7817e-04, PNorm = 154.9098, GNorm = 0.1106, lr_0 = 1.0376e-04
Loss = 1.1650e-03, PNorm = 154.9102, GNorm = 0.0687, lr_0 = 1.0369e-04
Loss = 3.1836e-03, PNorm = 154.9109, GNorm = 0.2248, lr_0 = 1.0362e-04
Loss = 1.4902e-03, PNorm = 154.9130, GNorm = 0.0617, lr_0 = 1.0355e-04
Loss = 2.0164e-03, PNorm = 154.9142, GNorm = 0.0576, lr_0 = 1.0348e-04
Loss = 9.9228e-04, PNorm = 154.9144, GNorm = 0.0294, lr_0 = 1.0341e-04
Loss = 4.0356e-04, PNorm = 154.9145, GNorm = 0.0997, lr_0 = 1.0334e-04
Loss = 3.8640e-04, PNorm = 154.9158, GNorm = 0.0193, lr_0 = 1.0327e-04
Loss = 1.6051e-03, PNorm = 154.9167, GNorm = 0.0473, lr_0 = 1.0320e-04
Loss = 8.4049e-04, PNorm = 154.9180, GNorm = 0.0719, lr_0 = 1.0312e-04
Loss = 5.1080e-04, PNorm = 154.9176, GNorm = 0.1029, lr_0 = 1.0305e-04
Loss = 7.9121e-04, PNorm = 154.9187, GNorm = 0.0551, lr_0 = 1.0298e-04
Loss = 5.0301e-04, PNorm = 154.9191, GNorm = 0.0562, lr_0 = 1.0291e-04
Loss = 1.6212e-03, PNorm = 154.9217, GNorm = 0.1047, lr_0 = 1.0284e-04
Loss = 1.7956e-03, PNorm = 154.9246, GNorm = 0.0681, lr_0 = 1.0277e-04
Loss = 9.3851e-04, PNorm = 154.9248, GNorm = 0.0497, lr_0 = 1.0270e-04
Loss = 1.9391e-03, PNorm = 154.9272, GNorm = 0.0523, lr_0 = 1.0263e-04
Loss = 3.5566e-03, PNorm = 154.9291, GNorm = 0.0396, lr_0 = 1.0256e-04
Loss = 1.1157e-03, PNorm = 154.9306, GNorm = 0.0634, lr_0 = 1.0249e-04
Loss = 3.9075e-04, PNorm = 154.9331, GNorm = 0.0785, lr_0 = 1.0242e-04
Loss = 1.1737e-03, PNorm = 154.9342, GNorm = 0.0207, lr_0 = 1.0235e-04
Loss = 1.4323e-03, PNorm = 154.9336, GNorm = 0.0721, lr_0 = 1.0228e-04
Loss = 3.9630e-04, PNorm = 154.9346, GNorm = 0.0418, lr_0 = 1.0221e-04
Loss = 8.8435e-04, PNorm = 154.9361, GNorm = 0.0349, lr_0 = 1.0214e-04
Loss = 8.6235e-04, PNorm = 154.9379, GNorm = 0.2952, lr_0 = 1.0207e-04
Loss = 1.4927e-03, PNorm = 154.9389, GNorm = 0.2645, lr_0 = 1.0200e-04
Loss = 1.1157e-03, PNorm = 154.9412, GNorm = 0.3321, lr_0 = 1.0193e-04
Loss = 4.1945e-04, PNorm = 154.9425, GNorm = 0.0558, lr_0 = 1.0186e-04
Loss = 5.6882e-04, PNorm = 154.9442, GNorm = 0.1905, lr_0 = 1.0179e-04
Loss = 1.2831e-03, PNorm = 154.9452, GNorm = 0.1080, lr_0 = 1.0172e-04
Loss = 9.1277e-04, PNorm = 154.9480, GNorm = 0.0159, lr_0 = 1.0165e-04
Loss = 4.9861e-04, PNorm = 154.9492, GNorm = 0.0661, lr_0 = 1.0158e-04
Loss = 6.7977e-04, PNorm = 154.9506, GNorm = 0.0969, lr_0 = 1.0151e-04
Loss = 2.2728e-03, PNorm = 154.9508, GNorm = 0.0424, lr_0 = 1.0144e-04
Loss = 1.2286e-03, PNorm = 154.9513, GNorm = 0.0713, lr_0 = 1.0137e-04
Loss = 1.7412e-03, PNorm = 154.9511, GNorm = 0.0590, lr_0 = 1.0130e-04
Loss = 9.5110e-04, PNorm = 154.9521, GNorm = 0.0790, lr_0 = 1.0123e-04
Loss = 3.0458e-03, PNorm = 154.9537, GNorm = 0.0313, lr_0 = 1.0116e-04
Loss = 4.1515e-04, PNorm = 154.9546, GNorm = 0.0181, lr_0 = 1.0110e-04
Loss = 8.2807e-04, PNorm = 154.9557, GNorm = 0.1180, lr_0 = 1.0103e-04
Loss = 7.4406e-04, PNorm = 154.9573, GNorm = 0.0858, lr_0 = 1.0096e-04
Loss = 6.9092e-04, PNorm = 154.9577, GNorm = 0.1082, lr_0 = 1.0089e-04
Loss = 4.2486e-04, PNorm = 154.9583, GNorm = 0.0530, lr_0 = 1.0082e-04
Loss = 1.0462e-03, PNorm = 154.9602, GNorm = 0.0392, lr_0 = 1.0075e-04
Loss = 1.3989e-03, PNorm = 154.9624, GNorm = 0.0708, lr_0 = 1.0068e-04
Loss = 6.8356e-04, PNorm = 154.9640, GNorm = 0.0204, lr_0 = 1.0061e-04
Loss = 4.0713e-04, PNorm = 154.9639, GNorm = 0.1268, lr_0 = 1.0054e-04
Loss = 1.0745e-03, PNorm = 154.9636, GNorm = 0.0912, lr_0 = 1.0047e-04
Loss = 9.1454e-04, PNorm = 154.9637, GNorm = 0.1187, lr_0 = 1.0041e-04
Loss = 1.0315e-03, PNorm = 154.9652, GNorm = 0.0420, lr_0 = 1.0034e-04
Loss = 4.9232e-04, PNorm = 154.9668, GNorm = 0.0587, lr_0 = 1.0027e-04
Loss = 1.0202e-03, PNorm = 154.9675, GNorm = 0.1192, lr_0 = 1.0020e-04
Loss = 3.5240e-03, PNorm = 154.9670, GNorm = 0.2033, lr_0 = 1.0013e-04
Loss = 1.6240e-03, PNorm = 154.9690, GNorm = 0.1033, lr_0 = 1.0006e-04
Loss = 7.0678e-04, PNorm = 154.9711, GNorm = 0.0801, lr_0 = 1.0000e-04
Validation mae = 0.476905
Model 0 best validation mae = 0.476633 on epoch 27
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.454713
Ensemble test mae = 0.454713
10-fold cross validation
	Seed 0 ==> test mae = 0.455711
	Seed 1 ==> test mae = 0.454582
	Seed 2 ==> test mae = 0.454301
	Seed 3 ==> test mae = 0.455806
	Seed 4 ==> test mae = 0.453829
	Seed 5 ==> test mae = 0.454501
	Seed 6 ==> test mae = 0.456409
	Seed 7 ==> test mae = 0.454259
	Seed 8 ==> test mae = 0.457525
	Seed 9 ==> test mae = 0.454713
Overall test mae = 0.455164 +/- 0.001103
Elapsed time = 4:39:14
